2014-07-16 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / gcc-4_9-branch / gcc / config / aarch64 / aarch64.c
blobe9bf85e2bb0919cf6c1f5243a235a2acb7c3ecd1
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
114 rtx value;
115 int shift;
116 int element_width;
117 bool mvn;
118 bool msl;
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
124 #ifdef HAVE_AS_TLS
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
127 #endif
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163 #else
164 #define NAMED_PARAM(NAME, VAL) (VAL)
165 #endif
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172 __extension__
173 #endif
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
177 .addr_scale_costs =
178 #endif
180 NAMED_PARAM (qi, 0),
181 NAMED_PARAM (hi, 0),
182 NAMED_PARAM (si, 0),
183 NAMED_PARAM (ti, 0),
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
193 __extension__
194 #endif
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
198 .addr_scale_costs =
199 #endif
201 NAMED_PARAM (qi, 0),
202 NAMED_PARAM (hi, 1),
203 NAMED_PARAM (si, 0),
204 NAMED_PARAM (ti, 1),
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214 __extension__
215 #endif
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
229 __extension__
230 #endif
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
249 __extension__
250 #endif
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
268 __extension__
269 #endif
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
301 struct processor
303 const char *const name;
304 enum aarch64_processor core;
305 const char *arch;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
316 #undef AARCH64_CORE
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
327 #undef AARCH64_ARCH
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
354 {NULL, 0, 0}
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
376 aarch64_cc;
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
388 unsigned
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
404 static bool
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
411 static bool
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419 static bool
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
423 if (TARGET_SIMD
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
426 return true;
428 return false;
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
438 case FP_REGS:
439 case FP_LO_REGS:
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
441 default:
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
444 gcc_unreachable ();
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
465 return 1;
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
470 return
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
472 else
473 return 1;
476 return 0;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480 enum machine_mode
481 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
482 enum machine_mode mode)
484 /* Handle modes that fit within single registers. */
485 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
487 if (GET_MODE_SIZE (mode) >= 4)
488 return mode;
489 else
490 return SImode;
492 /* Fall back to generic for multi-reg and very large modes. */
493 else
494 return choose_hard_reg_mode (regno, nregs, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
499 static bool
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
502 return false;
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
507 bool
508 aarch64_is_long_call_p (rtx sym)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
518 bool
519 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
520 rtx extract_imm)
522 HOST_WIDE_INT mult_val, extract_val;
524 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
525 return false;
527 mult_val = INTVAL (mult_imm);
528 extract_val = INTVAL (extract_imm);
530 if (extract_val > 8
531 && extract_val < GET_MODE_BITSIZE (mode)
532 && exact_log2 (extract_val & ~7) > 0
533 && (extract_val & 7) <= 4
534 && mult_val == (1 << (extract_val & 7)))
535 return true;
537 return false;
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
542 inline static rtx
543 emit_set_insn (rtx x, rtx y)
545 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
553 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
556 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
557 return cc_reg;
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc)
568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr)
577 enum tls_model tls_kind = TLS_MODEL_NONE;
578 rtx sym, addend;
580 if (GET_CODE (addr) == CONST)
582 split_const (addr, &sym, &addend);
583 if (GET_CODE (sym) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
586 else if (GET_CODE (addr) == SYMBOL_REF)
587 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
589 return tls_kind;
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
595 as :
596 RTL Absolute
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
601 PIC TLS
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
604 bl __tls_get_addr
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
610 adrp tmp, :tlsgd:imm
611 add dest, tmp, #:tlsgd_lo12:imm
612 bl __tls_get_addr
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
618 blr tmp
619 mrs tp, tpidr_el0
620 add dest, dest, tp
622 Initial Exec:
623 mrs tp, tpidr_el0
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
626 add dest, dest, tp
628 Local Exec:
629 mrs tp, tpidr_el0
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
634 static void
635 aarch64_load_symref_appropriately (rtx dest, rtx imm,
636 enum aarch64_symbol_type type)
638 switch (type)
640 case SYMBOL_SMALL_ABSOLUTE:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
643 rtx tmp_reg = dest;
644 enum machine_mode mode = GET_MODE (dest);
646 gcc_assert (mode == Pmode || mode == ptr_mode);
648 if (can_create_pseudo_p ())
649 tmp_reg = gen_reg_rtx (mode);
651 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
652 emit_insn (gen_add_losym (dest, tmp_reg, imm));
653 return;
656 case SYMBOL_TINY_ABSOLUTE:
657 emit_insn (gen_rtx_SET (Pmode, dest, imm));
658 return;
660 case SYMBOL_SMALL_GOT:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
669 rtx tmp_reg = dest;
670 enum machine_mode mode = GET_MODE (dest);
672 if (can_create_pseudo_p ())
673 tmp_reg = gen_reg_rtx (mode);
675 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
676 if (mode == ptr_mode)
678 if (mode == DImode)
679 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
680 else
681 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
683 else
685 gcc_assert (mode == Pmode);
686 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
689 return;
692 case SYMBOL_SMALL_TLSGD:
694 rtx insns;
695 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
697 start_sequence ();
698 emit_call_insn (gen_tlsgd_small (result, imm));
699 insns = get_insns ();
700 end_sequence ();
702 RTL_CONST_CALL_P (insns) = 1;
703 emit_libcall_block (insns, dest, result, imm);
704 return;
707 case SYMBOL_SMALL_TLSDESC:
709 enum machine_mode mode = GET_MODE (dest);
710 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
711 rtx tp;
713 gcc_assert (mode == Pmode || mode == ptr_mode);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
717 if (TARGET_ILP32)
718 emit_insn (gen_tlsdesc_small_si (imm));
719 else
720 emit_insn (gen_tlsdesc_small_di (imm));
721 tp = aarch64_load_tp (NULL);
723 if (mode != Pmode)
724 tp = gen_lowpart (mode, tp);
726 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
728 return;
731 case SYMBOL_SMALL_GOTTPREL:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode = GET_MODE (dest);
741 rtx tmp_reg = gen_reg_rtx (mode);
742 rtx tp = aarch64_load_tp (NULL);
744 if (mode == ptr_mode)
746 if (mode == DImode)
747 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
748 else
750 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
751 tp = gen_lowpart (mode, tp);
754 else
756 gcc_assert (mode == Pmode);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
760 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
762 return;
765 case SYMBOL_SMALL_TPREL:
767 rtx tp = aarch64_load_tp (NULL);
768 emit_insn (gen_tlsle_small (dest, tp, imm));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
770 return;
773 case SYMBOL_TINY_GOT:
774 emit_insn (gen_ldr_got_tiny (dest, imm));
775 return;
777 default:
778 gcc_unreachable ();
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
787 static rtx
788 aarch64_emit_move (rtx dest, rtx src)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest, src)
792 : emit_move_insn_1 (dest, src));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
800 effects. */
801 void
802 aarch64_split_128bit_move (rtx dst, rtx src)
804 rtx dst_lo, dst_hi;
805 rtx src_lo, src_hi;
807 enum machine_mode mode = GET_MODE (dst);
809 gcc_assert (mode == TImode || mode == TFmode);
810 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
811 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
813 if (REG_P (dst) && REG_P (src))
815 int src_regno = REGNO (src);
816 int dst_regno = REGNO (dst);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
821 src_lo = gen_lowpart (word_mode, src);
822 src_hi = gen_highpart (word_mode, src);
824 if (mode == TImode)
826 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
827 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
829 else
831 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
832 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
834 return;
836 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
838 dst_lo = gen_lowpart (word_mode, dst);
839 dst_hi = gen_highpart (word_mode, dst);
841 if (mode == TImode)
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
846 else
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
851 return;
855 dst_lo = gen_lowpart (word_mode, dst);
856 dst_hi = gen_highpart (word_mode, dst);
857 src_lo = gen_lowpart (word_mode, src);
858 src_hi = gen_highpart_mode (word_mode, mode, src);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo, src_hi))
863 aarch64_emit_move (dst_hi, src_hi);
864 aarch64_emit_move (dst_lo, src_lo);
866 else
868 aarch64_emit_move (dst_lo, src_lo);
869 aarch64_emit_move (dst_hi, src_hi);
873 bool
874 aarch64_split_128bit_move_p (rtx dst, rtx src)
876 return (! REG_P (src)
877 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
880 /* Split a complex SIMD combine. */
882 void
883 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
885 enum machine_mode src_mode = GET_MODE (src1);
886 enum machine_mode dst_mode = GET_MODE (dst);
888 gcc_assert (VECTOR_MODE_P (dst_mode));
890 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
892 rtx (*gen) (rtx, rtx, rtx);
894 switch (src_mode)
896 case V8QImode:
897 gen = gen_aarch64_simd_combinev8qi;
898 break;
899 case V4HImode:
900 gen = gen_aarch64_simd_combinev4hi;
901 break;
902 case V2SImode:
903 gen = gen_aarch64_simd_combinev2si;
904 break;
905 case V2SFmode:
906 gen = gen_aarch64_simd_combinev2sf;
907 break;
908 case DImode:
909 gen = gen_aarch64_simd_combinedi;
910 break;
911 case DFmode:
912 gen = gen_aarch64_simd_combinedf;
913 break;
914 default:
915 gcc_unreachable ();
918 emit_insn (gen (dst, src1, src2));
919 return;
923 /* Split a complex SIMD move. */
925 void
926 aarch64_split_simd_move (rtx dst, rtx src)
928 enum machine_mode src_mode = GET_MODE (src);
929 enum machine_mode dst_mode = GET_MODE (dst);
931 gcc_assert (VECTOR_MODE_P (dst_mode));
933 if (REG_P (dst) && REG_P (src))
935 rtx (*gen) (rtx, rtx);
937 gcc_assert (VECTOR_MODE_P (src_mode));
939 switch (src_mode)
941 case V16QImode:
942 gen = gen_aarch64_split_simd_movv16qi;
943 break;
944 case V8HImode:
945 gen = gen_aarch64_split_simd_movv8hi;
946 break;
947 case V4SImode:
948 gen = gen_aarch64_split_simd_movv4si;
949 break;
950 case V2DImode:
951 gen = gen_aarch64_split_simd_movv2di;
952 break;
953 case V4SFmode:
954 gen = gen_aarch64_split_simd_movv4sf;
955 break;
956 case V2DFmode:
957 gen = gen_aarch64_split_simd_movv2df;
958 break;
959 default:
960 gcc_unreachable ();
963 emit_insn (gen (dst, src));
964 return;
968 static rtx
969 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
971 if (can_create_pseudo_p ())
972 return force_reg (mode, value);
973 else
975 x = aarch64_emit_move (x, value);
976 return x;
981 static rtx
982 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
984 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
986 rtx high;
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high = GEN_INT (offset);
990 offset = 0;
991 high = aarch64_force_temporary (mode, temp, high);
992 reg = aarch64_force_temporary (mode, temp,
993 gen_rtx_PLUS (mode, high, reg));
995 return plus_constant (mode, reg, offset);
998 void
999 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1001 enum machine_mode mode = GET_MODE (dest);
1002 unsigned HOST_WIDE_INT mask;
1003 int i;
1004 bool first;
1005 unsigned HOST_WIDE_INT val;
1006 bool subtargets;
1007 rtx subtarget;
1008 int one_match, zero_match;
1010 gcc_assert (mode == SImode || mode == DImode);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm) == SYMBOL_REF
1014 || GET_CODE (imm) == LABEL_REF
1015 || GET_CODE (imm) == CONST)
1017 rtx mem, base, offset;
1018 enum aarch64_symbol_type sty;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm, &base, &offset);
1024 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025 switch (sty)
1027 case SYMBOL_FORCE_TO_MEM:
1028 if (offset != const0_rtx
1029 && targetm.cannot_force_const_mem (mode, imm))
1031 gcc_assert (can_create_pseudo_p ());
1032 base = aarch64_force_temporary (mode, dest, base);
1033 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034 aarch64_emit_move (dest, base);
1035 return;
1037 mem = force_const_mem (ptr_mode, imm);
1038 gcc_assert (mem);
1039 if (mode != ptr_mode)
1040 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 return;
1044 case SYMBOL_SMALL_TLSGD:
1045 case SYMBOL_SMALL_TLSDESC:
1046 case SYMBOL_SMALL_GOTTPREL:
1047 case SYMBOL_SMALL_GOT:
1048 case SYMBOL_TINY_GOT:
1049 if (offset != const0_rtx)
1051 gcc_assert(can_create_pseudo_p ());
1052 base = aarch64_force_temporary (mode, dest, base);
1053 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054 aarch64_emit_move (dest, base);
1055 return;
1057 /* FALLTHRU */
1059 case SYMBOL_SMALL_TPREL:
1060 case SYMBOL_SMALL_ABSOLUTE:
1061 case SYMBOL_TINY_ABSOLUTE:
1062 aarch64_load_symref_appropriately (dest, imm, sty);
1063 return;
1065 default:
1066 gcc_unreachable ();
1070 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1072 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073 return;
1076 if (!CONST_INT_P (imm))
1078 if (GET_CODE (imm) == HIGH)
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080 else
1082 rtx mem = force_const_mem (mode, imm);
1083 gcc_assert (mem);
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1087 return;
1090 if (mode == SImode)
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1094 us anything. */
1095 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098 return;
1101 /* Remaining cases are all for DImode. */
1103 val = INTVAL (imm);
1104 subtargets = optimize && can_create_pseudo_p ();
1106 one_match = 0;
1107 zero_match = 0;
1108 mask = 0xffff;
1110 for (i = 0; i < 64; i += 16, mask <<= 16)
1112 if ((val & mask) == 0)
1113 zero_match++;
1114 else if ((val & mask) == mask)
1115 one_match++;
1118 if (one_match == 2)
1120 mask = 0xffff;
1121 for (i = 0; i < 64; i += 16, mask <<= 16)
1123 if ((val & mask) != mask)
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127 GEN_INT ((val >> i) & 0xffff)));
1128 return;
1131 gcc_unreachable ();
1134 if (zero_match == 2)
1135 goto simple_sequence;
1137 mask = 0x0ffff0000UL;
1138 for (i = 16; i < 64; i += 16, mask <<= 16)
1140 HOST_WIDE_INT comp = mask & ~(mask - 1);
1142 if (aarch64_uimm12_shift (val - (val & mask)))
1144 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1146 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147 emit_insn (gen_adddi3 (dest, subtarget,
1148 GEN_INT (val - (val & mask))));
1149 return;
1151 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1153 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1155 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156 GEN_INT ((val + comp) & mask)));
1157 emit_insn (gen_adddi3 (dest, subtarget,
1158 GEN_INT (val - ((val + comp) & mask))));
1159 return;
1161 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1163 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1165 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166 GEN_INT ((val - comp) | ~mask)));
1167 emit_insn (gen_adddi3 (dest, subtarget,
1168 GEN_INT (val - ((val - comp) | ~mask))));
1169 return;
1171 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1173 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (val | ~mask)));
1177 emit_insn (gen_adddi3 (dest, subtarget,
1178 GEN_INT (val - (val | ~mask))));
1179 return;
1183 /* See if we can do it by arithmetically combining two
1184 immediates. */
1185 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1187 int j;
1188 mask = 0xffff;
1190 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1193 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195 GEN_INT (aarch64_bitmasks[i])));
1196 emit_insn (gen_adddi3 (dest, subtarget,
1197 GEN_INT (val - aarch64_bitmasks[i])));
1198 return;
1201 for (j = 0; j < 64; j += 16, mask <<= 16)
1203 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (aarch64_bitmasks[i])));
1207 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208 GEN_INT ((val >> j) & 0xffff)));
1209 return;
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1217 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1219 int j;
1221 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1224 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT (aarch64_bitmasks[i])));
1227 emit_insn (gen_iordi3 (dest, subtarget,
1228 GEN_INT (aarch64_bitmasks[j])));
1229 return;
1232 else if ((val & aarch64_bitmasks[i]) == val)
1234 int j;
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1240 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242 GEN_INT (aarch64_bitmasks[j])));
1243 emit_insn (gen_anddi3 (dest, subtarget,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 return;
1250 simple_sequence:
1251 first = true;
1252 mask = 0xffff;
1253 for (i = 0; i < 64; i += 16, mask <<= 16)
1255 if ((val & mask) != 0)
1257 if (first)
1259 emit_insn (gen_rtx_SET (VOIDmode, dest,
1260 GEN_INT (val & mask)));
1261 first = false;
1263 else
1264 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265 GEN_INT ((val >> i) & 0xffff)));
1270 static bool
1271 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1273 /* Indirect calls are not currently supported. */
1274 if (decl == NULL)
1275 return false;
1277 /* Cannot tail-call to long-calls, since these are outside of the
1278 range of a branch instruction (we could handle this if we added
1279 support for indirect tail-calls. */
1280 if (aarch64_decl_is_long_call_p (decl))
1281 return false;
1283 return true;
1286 /* Implement TARGET_PASS_BY_REFERENCE. */
1288 static bool
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1290 enum machine_mode mode,
1291 const_tree type,
1292 bool named ATTRIBUTE_UNUSED)
1294 HOST_WIDE_INT size;
1295 enum machine_mode dummymode;
1296 int nregs;
1298 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1299 size = (mode == BLKmode && type)
1300 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1302 /* Aggregates are passed by reference based on their size. */
1303 if (type && AGGREGATE_TYPE_P (type))
1305 size = int_size_in_bytes (type);
1308 /* Variable sized arguments are always returned by reference. */
1309 if (size < 0)
1310 return true;
1312 /* Can this be a candidate to be passed in fp/simd register(s)? */
1313 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1314 &dummymode, &nregs,
1315 NULL))
1316 return false;
1318 /* Arguments which are variable sized or larger than 2 registers are
1319 passed by reference unless they are a homogenous floating point
1320 aggregate. */
1321 return size > 2 * UNITS_PER_WORD;
1324 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1325 static bool
1326 aarch64_return_in_msb (const_tree valtype)
1328 enum machine_mode dummy_mode;
1329 int dummy_int;
1331 /* Never happens in little-endian mode. */
1332 if (!BYTES_BIG_ENDIAN)
1333 return false;
1335 /* Only composite types smaller than or equal to 16 bytes can
1336 be potentially returned in registers. */
1337 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1338 || int_size_in_bytes (valtype) <= 0
1339 || int_size_in_bytes (valtype) > 16)
1340 return false;
1342 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344 is always passed/returned in the least significant bits of fp/simd
1345 register(s). */
1346 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1347 &dummy_mode, &dummy_int, NULL))
1348 return false;
1350 return true;
1353 /* Implement TARGET_FUNCTION_VALUE.
1354 Define how to find the value returned by a function. */
1356 static rtx
1357 aarch64_function_value (const_tree type, const_tree func,
1358 bool outgoing ATTRIBUTE_UNUSED)
1360 enum machine_mode mode;
1361 int unsignedp;
1362 int count;
1363 enum machine_mode ag_mode;
1365 mode = TYPE_MODE (type);
1366 if (INTEGRAL_TYPE_P (type))
1367 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1369 if (aarch64_return_in_msb (type))
1371 HOST_WIDE_INT size = int_size_in_bytes (type);
1373 if (size % UNITS_PER_WORD != 0)
1375 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1376 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1380 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1381 &ag_mode, &count, NULL))
1383 if (!aarch64_composite_type_p (type, mode))
1385 gcc_assert (count == 1 && mode == ag_mode);
1386 return gen_rtx_REG (mode, V0_REGNUM);
1388 else
1390 int i;
1391 rtx par;
1393 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1394 for (i = 0; i < count; i++)
1396 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1397 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1398 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1399 XVECEXP (par, 0, i) = tmp;
1401 return par;
1404 else
1405 return gen_rtx_REG (mode, R0_REGNUM);
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409 Return true if REGNO is the number of a hard register in which the values
1410 of called function may come back. */
1412 static bool
1413 aarch64_function_value_regno_p (const unsigned int regno)
1415 /* Maximum of 16 bytes can be returned in the general registers. Examples
1416 of 16-byte return values are: 128-bit integers and 16-byte small
1417 structures (excluding homogeneous floating-point aggregates). */
1418 if (regno == R0_REGNUM || regno == R1_REGNUM)
1419 return true;
1421 /* Up to four fp/simd registers can return a function value, e.g. a
1422 homogeneous floating-point aggregate having four members. */
1423 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1424 return !TARGET_GENERAL_REGS_ONLY;
1426 return false;
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1431 If the type T of the result of a function is such that
1432 void func (T arg)
1433 would require that arg be passed as a value in a register (or set of
1434 registers) according to the parameter passing rules, then the result
1435 is returned in the same registers as would be used for such an
1436 argument. */
1438 static bool
1439 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1441 HOST_WIDE_INT size;
1442 enum machine_mode ag_mode;
1443 int count;
1445 if (!AGGREGATE_TYPE_P (type)
1446 && TREE_CODE (type) != COMPLEX_TYPE
1447 && TREE_CODE (type) != VECTOR_TYPE)
1448 /* Simple scalar types always returned in registers. */
1449 return false;
1451 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1452 type,
1453 &ag_mode,
1454 &count,
1455 NULL))
1456 return false;
1458 /* Types larger than 2 registers returned in memory. */
1459 size = int_size_in_bytes (type);
1460 return (size < 0 || size > 2 * UNITS_PER_WORD);
1463 static bool
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1465 const_tree type, int *nregs)
1467 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1468 return aarch64_vfp_is_call_or_return_candidate (mode,
1469 type,
1470 &pcum->aapcs_vfp_rmode,
1471 nregs,
1472 NULL);
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476 bits. The idea is to suppress any stronger alignment requested by
1477 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478 This is a helper function for local use only. */
1480 static unsigned int
1481 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1483 unsigned int alignment;
1485 if (type)
1487 if (!integer_zerop (TYPE_SIZE (type)))
1489 if (TYPE_MODE (type) == mode)
1490 alignment = TYPE_ALIGN (type);
1491 else
1492 alignment = GET_MODE_ALIGNMENT (mode);
1494 else
1495 alignment = 0;
1497 else
1498 alignment = GET_MODE_ALIGNMENT (mode);
1500 return alignment;
1503 /* Layout a function argument according to the AAPCS64 rules. The rule
1504 numbers refer to the rule numbers in the AAPCS64. */
1506 static void
1507 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1508 const_tree type,
1509 bool named ATTRIBUTE_UNUSED)
1511 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1512 int ncrn, nvrn, nregs;
1513 bool allocate_ncrn, allocate_nvrn;
1514 HOST_WIDE_INT size;
1516 /* We need to do this once per argument. */
1517 if (pcum->aapcs_arg_processed)
1518 return;
1520 pcum->aapcs_arg_processed = true;
1522 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1523 size
1524 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1525 UNITS_PER_WORD);
1527 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1528 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1529 mode,
1530 type,
1531 &nregs);
1533 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1534 The following code thus handles passing by SIMD/FP registers first. */
1536 nvrn = pcum->aapcs_nvrn;
1538 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1539 and homogenous short-vector aggregates (HVA). */
1540 if (allocate_nvrn)
1542 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1544 pcum->aapcs_nextnvrn = nvrn + nregs;
1545 if (!aarch64_composite_type_p (type, mode))
1547 gcc_assert (nregs == 1);
1548 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1550 else
1552 rtx par;
1553 int i;
1554 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1555 for (i = 0; i < nregs; i++)
1557 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1558 V0_REGNUM + nvrn + i);
1559 tmp = gen_rtx_EXPR_LIST
1560 (VOIDmode, tmp,
1561 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1562 XVECEXP (par, 0, i) = tmp;
1564 pcum->aapcs_reg = par;
1566 return;
1568 else
1570 /* C.3 NSRN is set to 8. */
1571 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1572 goto on_stack;
1576 ncrn = pcum->aapcs_ncrn;
1577 nregs = size / UNITS_PER_WORD;
1579 /* C6 - C9. though the sign and zero extension semantics are
1580 handled elsewhere. This is the case where the argument fits
1581 entirely general registers. */
1582 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1584 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1586 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1588 /* C.8 if the argument has an alignment of 16 then the NGRN is
1589 rounded up to the next even number. */
1590 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1592 ++ncrn;
1593 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1595 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1596 A reg is still generated for it, but the caller should be smart
1597 enough not to use it. */
1598 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1600 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1602 else
1604 rtx par;
1605 int i;
1607 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1608 for (i = 0; i < nregs; i++)
1610 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1611 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1612 GEN_INT (i * UNITS_PER_WORD));
1613 XVECEXP (par, 0, i) = tmp;
1615 pcum->aapcs_reg = par;
1618 pcum->aapcs_nextncrn = ncrn + nregs;
1619 return;
1622 /* C.11 */
1623 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1625 /* The argument is passed on stack; record the needed number of words for
1626 this argument and align the total size if necessary. */
1627 on_stack:
1628 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1629 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1630 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1631 16 / UNITS_PER_WORD);
1632 return;
1635 /* Implement TARGET_FUNCTION_ARG. */
1637 static rtx
1638 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1639 const_tree type, bool named)
1641 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1642 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1644 if (mode == VOIDmode)
1645 return NULL_RTX;
1647 aarch64_layout_arg (pcum_v, mode, type, named);
1648 return pcum->aapcs_reg;
1651 void
1652 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1653 const_tree fntype ATTRIBUTE_UNUSED,
1654 rtx libname ATTRIBUTE_UNUSED,
1655 const_tree fndecl ATTRIBUTE_UNUSED,
1656 unsigned n_named ATTRIBUTE_UNUSED)
1658 pcum->aapcs_ncrn = 0;
1659 pcum->aapcs_nvrn = 0;
1660 pcum->aapcs_nextncrn = 0;
1661 pcum->aapcs_nextnvrn = 0;
1662 pcum->pcs_variant = ARM_PCS_AAPCS64;
1663 pcum->aapcs_reg = NULL_RTX;
1664 pcum->aapcs_arg_processed = false;
1665 pcum->aapcs_stack_words = 0;
1666 pcum->aapcs_stack_size = 0;
1668 return;
1671 static void
1672 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1673 enum machine_mode mode,
1674 const_tree type,
1675 bool named)
1677 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1678 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1680 aarch64_layout_arg (pcum_v, mode, type, named);
1681 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1682 != (pcum->aapcs_stack_words != 0));
1683 pcum->aapcs_arg_processed = false;
1684 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1685 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1686 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1687 pcum->aapcs_stack_words = 0;
1688 pcum->aapcs_reg = NULL_RTX;
1692 bool
1693 aarch64_function_arg_regno_p (unsigned regno)
1695 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1696 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1699 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1700 PARM_BOUNDARY bits of alignment, but will be given anything up
1701 to STACK_BOUNDARY bits if the type requires it. This makes sure
1702 that both before and after the layout of each argument, the Next
1703 Stacked Argument Address (NSAA) will have a minimum alignment of
1704 8 bytes. */
1706 static unsigned int
1707 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1709 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1711 if (alignment < PARM_BOUNDARY)
1712 alignment = PARM_BOUNDARY;
1713 if (alignment > STACK_BOUNDARY)
1714 alignment = STACK_BOUNDARY;
1715 return alignment;
1718 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1720 Return true if an argument passed on the stack should be padded upwards,
1721 i.e. if the least-significant byte of the stack slot has useful data.
1723 Small aggregate types are placed in the lowest memory address.
1725 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1727 bool
1728 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1730 /* On little-endian targets, the least significant byte of every stack
1731 argument is passed at the lowest byte address of the stack slot. */
1732 if (!BYTES_BIG_ENDIAN)
1733 return true;
1735 /* Otherwise, integral, floating-point and pointer types are padded downward:
1736 the least significant byte of a stack argument is passed at the highest
1737 byte address of the stack slot. */
1738 if (type
1739 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1740 || POINTER_TYPE_P (type))
1741 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1742 return false;
1744 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1745 return true;
1748 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1750 It specifies padding for the last (may also be the only)
1751 element of a block move between registers and memory. If
1752 assuming the block is in the memory, padding upward means that
1753 the last element is padded after its highest significant byte,
1754 while in downward padding, the last element is padded at the
1755 its least significant byte side.
1757 Small aggregates and small complex types are always padded
1758 upwards.
1760 We don't need to worry about homogeneous floating-point or
1761 short-vector aggregates; their move is not affected by the
1762 padding direction determined here. Regardless of endianness,
1763 each element of such an aggregate is put in the least
1764 significant bits of a fp/simd register.
1766 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1767 register has useful data, and return the opposite if the most
1768 significant byte does. */
1770 bool
1771 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1772 bool first ATTRIBUTE_UNUSED)
1775 /* Small composite types are always padded upward. */
1776 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1778 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1779 : GET_MODE_SIZE (mode));
1780 if (size < 2 * UNITS_PER_WORD)
1781 return true;
1784 /* Otherwise, use the default padding. */
1785 return !BYTES_BIG_ENDIAN;
1788 static enum machine_mode
1789 aarch64_libgcc_cmp_return_mode (void)
1791 return SImode;
1794 static bool
1795 aarch64_frame_pointer_required (void)
1797 /* If the function contains dynamic stack allocations, we need to
1798 use the frame pointer to access the static parts of the frame. */
1799 if (cfun->calls_alloca)
1800 return true;
1802 /* In aarch64_override_options_after_change
1803 flag_omit_leaf_frame_pointer turns off the frame pointer by
1804 default. Turn it back on now if we've not got a leaf
1805 function. */
1806 if (flag_omit_leaf_frame_pointer
1807 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1808 return true;
1810 return false;
1813 /* Mark the registers that need to be saved by the callee and calculate
1814 the size of the callee-saved registers area and frame record (both FP
1815 and LR may be omitted). */
1816 static void
1817 aarch64_layout_frame (void)
1819 HOST_WIDE_INT offset = 0;
1820 int regno;
1822 if (reload_completed && cfun->machine->frame.laid_out)
1823 return;
1825 /* First mark all the registers that really need to be saved... */
1826 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1827 cfun->machine->frame.reg_offset[regno] = -1;
1829 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1830 cfun->machine->frame.reg_offset[regno] = -1;
1832 /* ... that includes the eh data registers (if needed)... */
1833 if (crtl->calls_eh_return)
1834 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1835 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1837 /* ... and any callee saved register that dataflow says is live. */
1838 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1839 if (df_regs_ever_live_p (regno)
1840 && !call_used_regs[regno])
1841 cfun->machine->frame.reg_offset[regno] = 0;
1843 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1844 if (df_regs_ever_live_p (regno)
1845 && !call_used_regs[regno])
1846 cfun->machine->frame.reg_offset[regno] = 0;
1848 if (frame_pointer_needed)
1850 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1851 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1852 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1855 /* Now assign stack slots for them. */
1856 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1857 if (cfun->machine->frame.reg_offset[regno] != -1)
1859 cfun->machine->frame.reg_offset[regno] = offset;
1860 offset += UNITS_PER_WORD;
1863 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1864 if (cfun->machine->frame.reg_offset[regno] != -1)
1866 cfun->machine->frame.reg_offset[regno] = offset;
1867 offset += UNITS_PER_WORD;
1870 if (frame_pointer_needed)
1872 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1873 offset += UNITS_PER_WORD;
1876 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1878 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1879 offset += UNITS_PER_WORD;
1882 cfun->machine->frame.padding0 =
1883 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1884 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1886 cfun->machine->frame.saved_regs_size = offset;
1887 cfun->machine->frame.laid_out = true;
1890 /* Make the last instruction frame-related and note that it performs
1891 the operation described by FRAME_PATTERN. */
1893 static void
1894 aarch64_set_frame_expr (rtx frame_pattern)
1896 rtx insn;
1898 insn = get_last_insn ();
1899 RTX_FRAME_RELATED_P (insn) = 1;
1900 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1901 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1902 frame_pattern,
1903 REG_NOTES (insn));
1906 static bool
1907 aarch64_register_saved_on_entry (int regno)
1909 return cfun->machine->frame.reg_offset[regno] != -1;
1913 static void
1914 aarch64_save_or_restore_fprs (int start_offset, int increment,
1915 bool restore, rtx base_rtx)
1918 unsigned regno;
1919 unsigned regno2;
1920 rtx insn;
1921 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1922 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1924 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1926 if (aarch64_register_saved_on_entry (regno))
1928 rtx mem;
1929 mem = gen_mem_ref (DFmode,
1930 plus_constant (Pmode,
1931 base_rtx,
1932 start_offset));
1934 for (regno2 = regno + 1;
1935 regno2 <= V31_REGNUM
1936 && !aarch64_register_saved_on_entry (regno2);
1937 regno2++)
1939 /* Empty loop. */
1942 if (regno2 <= V31_REGNUM &&
1943 aarch64_register_saved_on_entry (regno2))
1945 rtx mem2;
1947 /* Next highest register to be saved. */
1948 mem2 = gen_mem_ref (DFmode,
1949 plus_constant
1950 (Pmode,
1951 base_rtx,
1952 start_offset + increment));
1953 if (restore == false)
1955 insn = emit_insn
1956 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1957 mem2, gen_rtx_REG (DFmode, regno2)));
1960 else
1962 insn = emit_insn
1963 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1964 gen_rtx_REG (DFmode, regno2), mem2));
1966 add_reg_note (insn, REG_CFA_RESTORE,
1967 gen_rtx_REG (DFmode, regno));
1968 add_reg_note (insn, REG_CFA_RESTORE,
1969 gen_rtx_REG (DFmode, regno2));
1972 /* The first part of a frame-related parallel insn is
1973 always assumed to be relevant to the frame
1974 calculations; subsequent parts, are only
1975 frame-related if explicitly marked. */
1976 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1977 regno = regno2;
1978 start_offset += increment * 2;
1980 else
1982 if (restore == false)
1983 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1984 else
1986 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1987 add_reg_note (insn, REG_CFA_RESTORE,
1988 gen_rtx_REG (DFmode, regno));
1990 start_offset += increment;
1992 RTX_FRAME_RELATED_P (insn) = 1;
1998 /* offset from the stack pointer of where the saves and
1999 restore's have to happen. */
2000 static void
2001 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
2002 bool restore)
2004 rtx insn;
2005 rtx base_rtx = stack_pointer_rtx;
2006 HOST_WIDE_INT start_offset = offset;
2007 HOST_WIDE_INT increment = UNITS_PER_WORD;
2008 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2009 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2010 unsigned regno;
2011 unsigned regno2;
2013 for (regno = R0_REGNUM; regno <= limit; regno++)
2015 if (aarch64_register_saved_on_entry (regno))
2017 rtx mem;
2018 mem = gen_mem_ref (Pmode,
2019 plus_constant (Pmode,
2020 base_rtx,
2021 start_offset));
2023 for (regno2 = regno + 1;
2024 regno2 <= limit
2025 && !aarch64_register_saved_on_entry (regno2);
2026 regno2++)
2028 /* Empty loop. */
2030 if (regno2 <= limit &&
2031 aarch64_register_saved_on_entry (regno2))
2033 rtx mem2;
2035 /* Next highest register to be saved. */
2036 mem2 = gen_mem_ref (Pmode,
2037 plus_constant
2038 (Pmode,
2039 base_rtx,
2040 start_offset + increment));
2041 if (restore == false)
2043 insn = emit_insn
2044 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2045 mem2, gen_rtx_REG (DImode, regno2)));
2048 else
2050 insn = emit_insn
2051 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2052 gen_rtx_REG (DImode, regno2), mem2));
2054 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2055 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2058 /* The first part of a frame-related parallel insn is
2059 always assumed to be relevant to the frame
2060 calculations; subsequent parts, are only
2061 frame-related if explicitly marked. */
2062 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2063 regno = regno2;
2064 start_offset += increment * 2;
2066 else
2068 if (restore == false)
2069 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2070 else
2072 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2073 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2075 start_offset += increment;
2077 RTX_FRAME_RELATED_P (insn) = 1;
2081 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2084 /* AArch64 stack frames generated by this compiler look like:
2086 +-------------------------------+
2088 | incoming stack arguments |
2090 +-------------------------------+
2091 | | <-- incoming stack pointer (aligned)
2092 | callee-allocated save area |
2093 | for register varargs |
2095 +-------------------------------+
2096 | local variables | <-- frame_pointer_rtx
2098 +-------------------------------+
2099 | padding0 | \
2100 +-------------------------------+ |
2101 | callee-saved registers | | frame.saved_regs_size
2102 +-------------------------------+ |
2103 | LR' | |
2104 +-------------------------------+ |
2105 | FP' | / <- hard_frame_pointer_rtx (aligned)
2106 +-------------------------------+
2107 | dynamic allocation |
2108 +-------------------------------+
2109 | padding |
2110 +-------------------------------+
2111 | outgoing stack arguments | <-- arg_pointer
2113 +-------------------------------+
2114 | | <-- stack_pointer_rtx (aligned)
2116 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2117 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2118 unchanged. */
2120 /* Generate the prologue instructions for entry into a function.
2121 Establish the stack frame by decreasing the stack pointer with a
2122 properly calculated size and, if necessary, create a frame record
2123 filled with the values of LR and previous frame pointer. The
2124 current FP is also set up if it is in use. */
2126 void
2127 aarch64_expand_prologue (void)
2129 /* sub sp, sp, #<frame_size>
2130 stp {fp, lr}, [sp, #<frame_size> - 16]
2131 add fp, sp, #<frame_size> - hardfp_offset
2132 stp {cs_reg}, [fp, #-16] etc.
2134 sub sp, sp, <final_adjustment_if_any>
2136 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2137 HOST_WIDE_INT frame_size, offset;
2138 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2139 rtx insn;
2141 aarch64_layout_frame ();
2142 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2143 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2144 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2145 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2146 + crtl->outgoing_args_size);
2147 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2148 STACK_BOUNDARY / BITS_PER_UNIT);
2150 if (flag_stack_usage_info)
2151 current_function_static_stack_size = frame_size;
2153 fp_offset = (offset
2154 - original_frame_size
2155 - cfun->machine->frame.saved_regs_size);
2157 /* Store pairs and load pairs have a range only -512 to 504. */
2158 if (offset >= 512)
2160 /* When the frame has a large size, an initial decrease is done on
2161 the stack pointer to jump over the callee-allocated save area for
2162 register varargs, the local variable area and/or the callee-saved
2163 register area. This will allow the pre-index write-back
2164 store pair instructions to be used for setting up the stack frame
2165 efficiently. */
2166 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2167 if (offset >= 512)
2168 offset = cfun->machine->frame.saved_regs_size;
2170 frame_size -= (offset + crtl->outgoing_args_size);
2171 fp_offset = 0;
2173 if (frame_size >= 0x1000000)
2175 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2176 emit_move_insn (op0, GEN_INT (-frame_size));
2177 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode, stack_pointer_rtx,
2180 plus_constant (Pmode,
2181 stack_pointer_rtx,
2182 -frame_size)));
2184 else if (frame_size > 0)
2186 if ((frame_size & 0xfff) != frame_size)
2188 insn = emit_insn (gen_add2_insn
2189 (stack_pointer_rtx,
2190 GEN_INT (-(frame_size
2191 & ~(HOST_WIDE_INT)0xfff))));
2192 RTX_FRAME_RELATED_P (insn) = 1;
2194 if ((frame_size & 0xfff) != 0)
2196 insn = emit_insn (gen_add2_insn
2197 (stack_pointer_rtx,
2198 GEN_INT (-(frame_size
2199 & (HOST_WIDE_INT)0xfff))));
2200 RTX_FRAME_RELATED_P (insn) = 1;
2204 else
2205 frame_size = -1;
2207 if (offset > 0)
2209 /* Save the frame pointer and lr if the frame pointer is needed
2210 first. Make the frame pointer point to the location of the
2211 old frame pointer on the stack. */
2212 if (frame_pointer_needed)
2214 rtx mem_fp, mem_lr;
2216 if (fp_offset)
2218 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2219 GEN_INT (-offset)));
2220 RTX_FRAME_RELATED_P (insn) = 1;
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode, stack_pointer_rtx,
2223 gen_rtx_MINUS (Pmode,
2224 stack_pointer_rtx,
2225 GEN_INT (offset))));
2226 mem_fp = gen_frame_mem (DImode,
2227 plus_constant (Pmode,
2228 stack_pointer_rtx,
2229 fp_offset));
2230 mem_lr = gen_frame_mem (DImode,
2231 plus_constant (Pmode,
2232 stack_pointer_rtx,
2233 fp_offset
2234 + UNITS_PER_WORD));
2235 insn = emit_insn (gen_store_pairdi (mem_fp,
2236 hard_frame_pointer_rtx,
2237 mem_lr,
2238 gen_rtx_REG (DImode,
2239 LR_REGNUM)));
2241 else
2243 insn = emit_insn (gen_storewb_pairdi_di
2244 (stack_pointer_rtx, stack_pointer_rtx,
2245 hard_frame_pointer_rtx,
2246 gen_rtx_REG (DImode, LR_REGNUM),
2247 GEN_INT (-offset),
2248 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2249 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2252 /* The first part of a frame-related parallel insn is always
2253 assumed to be relevant to the frame calculations;
2254 subsequent parts, are only frame-related if explicitly
2255 marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn) = 1;
2259 /* Set up frame pointer to point to the location of the
2260 previous frame pointer on the stack. */
2261 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2262 stack_pointer_rtx,
2263 GEN_INT (fp_offset)));
2264 aarch64_set_frame_expr (gen_rtx_SET
2265 (Pmode, hard_frame_pointer_rtx,
2266 plus_constant (Pmode,
2267 stack_pointer_rtx,
2268 fp_offset)));
2269 RTX_FRAME_RELATED_P (insn) = 1;
2270 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2271 hard_frame_pointer_rtx));
2273 else
2275 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2276 GEN_INT (-offset)));
2277 RTX_FRAME_RELATED_P (insn) = 1;
2280 aarch64_save_or_restore_callee_save_registers
2281 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2284 /* when offset >= 512,
2285 sub sp, sp, #<outgoing_args_size> */
2286 if (frame_size > -1)
2288 if (crtl->outgoing_args_size > 0)
2290 insn = emit_insn (gen_add2_insn
2291 (stack_pointer_rtx,
2292 GEN_INT (- crtl->outgoing_args_size)));
2293 RTX_FRAME_RELATED_P (insn) = 1;
2298 /* Generate the epilogue instructions for returning from a function. */
2299 void
2300 aarch64_expand_epilogue (bool for_sibcall)
2302 HOST_WIDE_INT original_frame_size, frame_size, offset;
2303 HOST_WIDE_INT fp_offset;
2304 rtx insn;
2305 rtx cfa_reg;
2307 aarch64_layout_frame ();
2308 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2309 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2310 + crtl->outgoing_args_size);
2311 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2312 STACK_BOUNDARY / BITS_PER_UNIT);
2314 fp_offset = (offset
2315 - original_frame_size
2316 - cfun->machine->frame.saved_regs_size);
2318 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2320 /* Store pairs and load pairs have a range only -512 to 504. */
2321 if (offset >= 512)
2323 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2324 if (offset >= 512)
2325 offset = cfun->machine->frame.saved_regs_size;
2327 frame_size -= (offset + crtl->outgoing_args_size);
2328 fp_offset = 0;
2329 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2331 insn = emit_insn (gen_add2_insn
2332 (stack_pointer_rtx,
2333 GEN_INT (crtl->outgoing_args_size)));
2334 RTX_FRAME_RELATED_P (insn) = 1;
2337 else
2338 frame_size = -1;
2340 /* If there were outgoing arguments or we've done dynamic stack
2341 allocation, then restore the stack pointer from the frame
2342 pointer. This is at most one insn and more efficient than using
2343 GCC's internal mechanism. */
2344 if (frame_pointer_needed
2345 && (crtl->outgoing_args_size || cfun->calls_alloca))
2347 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2348 hard_frame_pointer_rtx,
2349 GEN_INT (- fp_offset)));
2350 RTX_FRAME_RELATED_P (insn) = 1;
2351 /* As SP is set to (FP - fp_offset), according to the rules in
2352 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353 from the value of SP from now on. */
2354 cfa_reg = stack_pointer_rtx;
2357 aarch64_save_or_restore_callee_save_registers
2358 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2360 /* Restore the frame pointer and lr if the frame pointer is needed. */
2361 if (offset > 0)
2363 if (frame_pointer_needed)
2365 rtx mem_fp, mem_lr;
2367 if (fp_offset)
2369 mem_fp = gen_frame_mem (DImode,
2370 plus_constant (Pmode,
2371 stack_pointer_rtx,
2372 fp_offset));
2373 mem_lr = gen_frame_mem (DImode,
2374 plus_constant (Pmode,
2375 stack_pointer_rtx,
2376 fp_offset
2377 + UNITS_PER_WORD));
2378 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2379 mem_fp,
2380 gen_rtx_REG (DImode,
2381 LR_REGNUM),
2382 mem_lr));
2384 else
2386 insn = emit_insn (gen_loadwb_pairdi_di
2387 (stack_pointer_rtx,
2388 stack_pointer_rtx,
2389 hard_frame_pointer_rtx,
2390 gen_rtx_REG (DImode, LR_REGNUM),
2391 GEN_INT (offset),
2392 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2394 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2395 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2396 plus_constant (Pmode, cfa_reg,
2397 offset))));
2400 /* The first part of a frame-related parallel insn
2401 is always assumed to be relevant to the frame
2402 calculations; subsequent parts, are only
2403 frame-related if explicitly marked. */
2404 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2405 RTX_FRAME_RELATED_P (insn) = 1;
2406 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2407 add_reg_note (insn, REG_CFA_RESTORE,
2408 gen_rtx_REG (DImode, LR_REGNUM));
2410 if (fp_offset)
2412 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413 GEN_INT (offset)));
2414 RTX_FRAME_RELATED_P (insn) = 1;
2417 else
2419 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420 GEN_INT (offset)));
2421 RTX_FRAME_RELATED_P (insn) = 1;
2425 /* Stack adjustment for exception handler. */
2426 if (crtl->calls_eh_return)
2428 /* We need to unwind the stack by the offset computed by
2429 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2430 based on SP. Ideally we would update the SP and define the
2431 CFA along the lines of:
2433 SP = SP + EH_RETURN_STACKADJ_RTX
2434 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2436 However the dwarf emitter only understands a constant
2437 register offset.
2439 The solution chosen here is to use the otherwise unused IP0
2440 as a temporary register to hold the current SP value. The
2441 CFA is described using IP0 then SP is modified. */
2443 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2445 insn = emit_move_insn (ip0, stack_pointer_rtx);
2446 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2447 RTX_FRAME_RELATED_P (insn) = 1;
2449 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2451 /* Ensure the assignment to IP0 does not get optimized away. */
2452 emit_use (ip0);
2455 if (frame_size > -1)
2457 if (frame_size >= 0x1000000)
2459 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2460 emit_move_insn (op0, GEN_INT (frame_size));
2461 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2462 aarch64_set_frame_expr (gen_rtx_SET
2463 (Pmode, stack_pointer_rtx,
2464 plus_constant (Pmode,
2465 stack_pointer_rtx,
2466 frame_size)));
2468 else if (frame_size > 0)
2470 if ((frame_size & 0xfff) != 0)
2472 insn = emit_insn (gen_add2_insn
2473 (stack_pointer_rtx,
2474 GEN_INT ((frame_size
2475 & (HOST_WIDE_INT) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn) = 1;
2478 if ((frame_size & 0xfff) != frame_size)
2480 insn = emit_insn (gen_add2_insn
2481 (stack_pointer_rtx,
2482 GEN_INT ((frame_size
2483 & ~ (HOST_WIDE_INT) 0xfff))));
2484 RTX_FRAME_RELATED_P (insn) = 1;
2488 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2489 plus_constant (Pmode,
2490 stack_pointer_rtx,
2491 offset)));
2494 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2495 if (!for_sibcall)
2496 emit_jump_insn (ret_rtx);
2499 /* Return the place to copy the exception unwinding return address to.
2500 This will probably be a stack slot, but could (in theory be the
2501 return register). */
2503 aarch64_final_eh_return_addr (void)
2505 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2506 aarch64_layout_frame ();
2507 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2508 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2509 + crtl->outgoing_args_size);
2510 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2511 STACK_BOUNDARY / BITS_PER_UNIT);
2512 fp_offset = offset
2513 - original_frame_size
2514 - cfun->machine->frame.saved_regs_size;
2516 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2517 return gen_rtx_REG (DImode, LR_REGNUM);
2519 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2520 result in a store to save LR introduced by builtin_eh_return () being
2521 incorrectly deleted because the alias is not detected.
2522 So in the calculation of the address to copy the exception unwinding
2523 return address to, we note 2 cases.
2524 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525 we return a SP-relative location since all the addresses are SP-relative
2526 in this case. This prevents the store from being optimized away.
2527 If the fp_offset is not 0, then the addresses will be FP-relative and
2528 therefore we return a FP-relative location. */
2530 if (frame_pointer_needed)
2532 if (fp_offset)
2533 return gen_frame_mem (DImode,
2534 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2535 else
2536 return gen_frame_mem (DImode,
2537 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2540 /* If FP is not needed, we calculate the location of LR, which would be
2541 at the top of the saved registers block. */
2543 return gen_frame_mem (DImode,
2544 plus_constant (Pmode,
2545 stack_pointer_rtx,
2546 fp_offset
2547 + cfun->machine->frame.saved_regs_size
2548 - 2 * UNITS_PER_WORD));
2551 /* Possibly output code to build up a constant in a register. For
2552 the benefit of the costs infrastructure, returns the number of
2553 instructions which would be emitted. GENERATE inhibits or
2554 enables code generation. */
2556 static int
2557 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2559 int insns = 0;
2561 if (aarch64_bitmask_imm (val, DImode))
2563 if (generate)
2564 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2565 insns = 1;
2567 else
2569 int i;
2570 int ncount = 0;
2571 int zcount = 0;
2572 HOST_WIDE_INT valp = val >> 16;
2573 HOST_WIDE_INT valm;
2574 HOST_WIDE_INT tval;
2576 for (i = 16; i < 64; i += 16)
2578 valm = (valp & 0xffff);
2580 if (valm != 0)
2581 ++ zcount;
2583 if (valm != 0xffff)
2584 ++ ncount;
2586 valp >>= 16;
2589 /* zcount contains the number of additional MOVK instructions
2590 required if the constant is built up with an initial MOVZ instruction,
2591 while ncount is the number of MOVK instructions required if starting
2592 with a MOVN instruction. Choose the sequence that yields the fewest
2593 number of instructions, preferring MOVZ instructions when they are both
2594 the same. */
2595 if (ncount < zcount)
2597 if (generate)
2598 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2600 tval = 0xffff;
2601 insns++;
2603 else
2605 if (generate)
2606 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2607 GEN_INT (val & 0xffff));
2608 tval = 0;
2609 insns++;
2612 val >>= 16;
2614 for (i = 16; i < 64; i += 16)
2616 if ((val & 0xffff) != tval)
2618 if (generate)
2619 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2620 GEN_INT (i),
2621 GEN_INT (val & 0xffff)));
2622 insns++;
2624 val >>= 16;
2627 return insns;
2630 static void
2631 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2633 HOST_WIDE_INT mdelta = delta;
2634 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2635 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2637 if (mdelta < 0)
2638 mdelta = -mdelta;
2640 if (mdelta >= 4096 * 4096)
2642 (void) aarch64_build_constant (scratchreg, delta, true);
2643 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2645 else if (mdelta > 0)
2647 if (mdelta >= 4096)
2649 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2650 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2651 if (delta < 0)
2652 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2653 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2654 else
2655 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2656 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2658 if (mdelta % 4096 != 0)
2660 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2661 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2662 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2667 /* Output code to add DELTA to the first argument, and then jump
2668 to FUNCTION. Used for C++ multiple inheritance. */
2669 static void
2670 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2671 HOST_WIDE_INT delta,
2672 HOST_WIDE_INT vcall_offset,
2673 tree function)
2675 /* The this pointer is always in x0. Note that this differs from
2676 Arm where the this pointer maybe bumped to r1 if r0 is required
2677 to return a pointer to an aggregate. On AArch64 a result value
2678 pointer will be in x8. */
2679 int this_regno = R0_REGNUM;
2680 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2682 reload_completed = 1;
2683 emit_note (NOTE_INSN_PROLOGUE_END);
2685 if (vcall_offset == 0)
2686 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2687 else
2689 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2691 this_rtx = gen_rtx_REG (Pmode, this_regno);
2692 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2693 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2695 addr = this_rtx;
2696 if (delta != 0)
2698 if (delta >= -256 && delta < 256)
2699 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2700 plus_constant (Pmode, this_rtx, delta));
2701 else
2702 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2705 if (Pmode == ptr_mode)
2706 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2707 else
2708 aarch64_emit_move (temp0,
2709 gen_rtx_ZERO_EXTEND (Pmode,
2710 gen_rtx_MEM (ptr_mode, addr)));
2712 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2713 addr = plus_constant (Pmode, temp0, vcall_offset);
2714 else
2716 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2717 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2720 if (Pmode == ptr_mode)
2721 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2722 else
2723 aarch64_emit_move (temp1,
2724 gen_rtx_SIGN_EXTEND (Pmode,
2725 gen_rtx_MEM (ptr_mode, addr)));
2727 emit_insn (gen_add2_insn (this_rtx, temp1));
2730 /* Generate a tail call to the target function. */
2731 if (!TREE_USED (function))
2733 assemble_external (function);
2734 TREE_USED (function) = 1;
2736 funexp = XEXP (DECL_RTL (function), 0);
2737 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2738 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2739 SIBLING_CALL_P (insn) = 1;
2741 insn = get_insns ();
2742 shorten_branches (insn);
2743 final_start_function (insn, file, 1);
2744 final (insn, file, 1);
2745 final_end_function ();
2747 /* Stop pretending to be a post-reload pass. */
2748 reload_completed = 0;
2751 static int
2752 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2754 if (GET_CODE (*x) == SYMBOL_REF)
2755 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2757 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758 TLS offsets, not real symbol references. */
2759 if (GET_CODE (*x) == UNSPEC
2760 && XINT (*x, 1) == UNSPEC_TLS)
2761 return -1;
2763 return 0;
2766 static bool
2767 aarch64_tls_referenced_p (rtx x)
2769 if (!TARGET_HAVE_TLS)
2770 return false;
2772 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2776 static int
2777 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2779 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2780 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2782 if (*imm1 < *imm2)
2783 return -1;
2784 if (*imm1 > *imm2)
2785 return +1;
2786 return 0;
2790 static void
2791 aarch64_build_bitmask_table (void)
2793 unsigned HOST_WIDE_INT mask, imm;
2794 unsigned int log_e, e, s, r;
2795 unsigned int nimms = 0;
2797 for (log_e = 1; log_e <= 6; log_e++)
2799 e = 1 << log_e;
2800 if (e == 64)
2801 mask = ~(HOST_WIDE_INT) 0;
2802 else
2803 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2804 for (s = 1; s < e; s++)
2806 for (r = 0; r < e; r++)
2808 /* set s consecutive bits to 1 (s < 64) */
2809 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2810 /* rotate right by r */
2811 if (r != 0)
2812 imm = ((imm >> r) | (imm << (e - r))) & mask;
2813 /* replicate the constant depending on SIMD size */
2814 switch (log_e) {
2815 case 1: imm |= (imm << 2);
2816 case 2: imm |= (imm << 4);
2817 case 3: imm |= (imm << 8);
2818 case 4: imm |= (imm << 16);
2819 case 5: imm |= (imm << 32);
2820 case 6:
2821 break;
2822 default:
2823 gcc_unreachable ();
2825 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2826 aarch64_bitmasks[nimms++] = imm;
2831 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2832 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2833 aarch64_bitmasks_cmp);
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838 a left shift of 0 or 12 bits. */
2839 bool
2840 aarch64_uimm12_shift (HOST_WIDE_INT val)
2842 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2843 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2848 /* Return true if val is an immediate that can be loaded into a
2849 register by a MOVZ instruction. */
2850 static bool
2851 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2853 if (GET_MODE_SIZE (mode) > 4)
2855 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2856 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2857 return 1;
2859 else
2861 /* Ignore sign extension. */
2862 val &= (HOST_WIDE_INT) 0xffffffff;
2864 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2865 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2869 /* Return true if val is a valid bitmask immediate. */
2870 bool
2871 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2873 if (GET_MODE_SIZE (mode) < 8)
2875 /* Replicate bit pattern. */
2876 val &= (HOST_WIDE_INT) 0xffffffff;
2877 val |= val << 32;
2879 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2880 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2884 /* Return true if val is an immediate that can be loaded into a
2885 register in a single instruction. */
2886 bool
2887 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2889 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2890 return 1;
2891 return aarch64_bitmask_imm (val, mode);
2894 static bool
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2897 rtx base, offset;
2899 if (GET_CODE (x) == HIGH)
2900 return true;
2902 split_const (x, &base, &offset);
2903 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2905 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2906 != SYMBOL_FORCE_TO_MEM)
2907 return true;
2908 else
2909 /* Avoid generating a 64-bit relocation in ILP32; leave
2910 to aarch64_expand_mov_immediate to handle it properly. */
2911 return mode != ptr_mode;
2914 return aarch64_tls_referenced_p (x);
2917 /* Return true if register REGNO is a valid index register.
2918 STRICT_P is true if REG_OK_STRICT is in effect. */
2920 bool
2921 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2923 if (!HARD_REGISTER_NUM_P (regno))
2925 if (!strict_p)
2926 return true;
2928 if (!reg_renumber)
2929 return false;
2931 regno = reg_renumber[regno];
2933 return GP_REGNUM_P (regno);
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937 STRICT_P is true if REG_OK_STRICT is in effect. */
2939 bool
2940 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2942 if (!HARD_REGISTER_NUM_P (regno))
2944 if (!strict_p)
2945 return true;
2947 if (!reg_renumber)
2948 return false;
2950 regno = reg_renumber[regno];
2953 /* The fake registers will be eliminated to either the stack or
2954 hard frame pointer, both of which are usually valid base registers.
2955 Reload deals with the cases where the eliminated form isn't valid. */
2956 return (GP_REGNUM_P (regno)
2957 || regno == SP_REGNUM
2958 || regno == FRAME_POINTER_REGNUM
2959 || regno == ARG_POINTER_REGNUM);
2962 /* Return true if X is a valid base register for mode MODE.
2963 STRICT_P is true if REG_OK_STRICT is in effect. */
2965 static bool
2966 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2968 if (!strict_p && GET_CODE (x) == SUBREG)
2969 x = SUBREG_REG (x);
2971 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2974 /* Return true if address offset is a valid index. If it is, fill in INFO
2975 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2977 static bool
2978 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2979 enum machine_mode mode, bool strict_p)
2981 enum aarch64_address_type type;
2982 rtx index;
2983 int shift;
2985 /* (reg:P) */
2986 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2987 && GET_MODE (x) == Pmode)
2989 type = ADDRESS_REG_REG;
2990 index = x;
2991 shift = 0;
2993 /* (sign_extend:DI (reg:SI)) */
2994 else if ((GET_CODE (x) == SIGN_EXTEND
2995 || GET_CODE (x) == ZERO_EXTEND)
2996 && GET_MODE (x) == DImode
2997 && GET_MODE (XEXP (x, 0)) == SImode)
2999 type = (GET_CODE (x) == SIGN_EXTEND)
3000 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3001 index = XEXP (x, 0);
3002 shift = 0;
3004 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005 else if (GET_CODE (x) == MULT
3006 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3007 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3008 && GET_MODE (XEXP (x, 0)) == DImode
3009 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3010 && CONST_INT_P (XEXP (x, 1)))
3012 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3013 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3014 index = XEXP (XEXP (x, 0), 0);
3015 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3017 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018 else if (GET_CODE (x) == ASHIFT
3019 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3020 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3021 && GET_MODE (XEXP (x, 0)) == DImode
3022 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3023 && CONST_INT_P (XEXP (x, 1)))
3025 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3026 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3027 index = XEXP (XEXP (x, 0), 0);
3028 shift = INTVAL (XEXP (x, 1));
3030 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031 else if ((GET_CODE (x) == SIGN_EXTRACT
3032 || GET_CODE (x) == ZERO_EXTRACT)
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3038 type = (GET_CODE (x) == SIGN_EXTRACT)
3039 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042 if (INTVAL (XEXP (x, 1)) != 32 + shift
3043 || INTVAL (XEXP (x, 2)) != 0)
3044 shift = -1;
3046 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047 (const_int 0xffffffff<<shift)) */
3048 else if (GET_CODE (x) == AND
3049 && GET_MODE (x) == DImode
3050 && GET_CODE (XEXP (x, 0)) == MULT
3051 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3052 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3053 && CONST_INT_P (XEXP (x, 1)))
3055 type = ADDRESS_REG_UXTW;
3056 index = XEXP (XEXP (x, 0), 0);
3057 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3058 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3059 shift = -1;
3061 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062 else if ((GET_CODE (x) == SIGN_EXTRACT
3063 || GET_CODE (x) == ZERO_EXTRACT)
3064 && GET_MODE (x) == DImode
3065 && GET_CODE (XEXP (x, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3069 type = (GET_CODE (x) == SIGN_EXTRACT)
3070 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3071 index = XEXP (XEXP (x, 0), 0);
3072 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073 if (INTVAL (XEXP (x, 1)) != 32 + shift
3074 || INTVAL (XEXP (x, 2)) != 0)
3075 shift = -1;
3077 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078 (const_int 0xffffffff<<shift)) */
3079 else if (GET_CODE (x) == AND
3080 && GET_MODE (x) == DImode
3081 && GET_CODE (XEXP (x, 0)) == ASHIFT
3082 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3083 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3084 && CONST_INT_P (XEXP (x, 1)))
3086 type = ADDRESS_REG_UXTW;
3087 index = XEXP (XEXP (x, 0), 0);
3088 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3089 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3090 shift = -1;
3092 /* (mult:P (reg:P) (const_int scale)) */
3093 else if (GET_CODE (x) == MULT
3094 && GET_MODE (x) == Pmode
3095 && GET_MODE (XEXP (x, 0)) == Pmode
3096 && CONST_INT_P (XEXP (x, 1)))
3098 type = ADDRESS_REG_REG;
3099 index = XEXP (x, 0);
3100 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3102 /* (ashift:P (reg:P) (const_int shift)) */
3103 else if (GET_CODE (x) == ASHIFT
3104 && GET_MODE (x) == Pmode
3105 && GET_MODE (XEXP (x, 0)) == Pmode
3106 && CONST_INT_P (XEXP (x, 1)))
3108 type = ADDRESS_REG_REG;
3109 index = XEXP (x, 0);
3110 shift = INTVAL (XEXP (x, 1));
3112 else
3113 return false;
3115 if (GET_CODE (index) == SUBREG)
3116 index = SUBREG_REG (index);
3118 if ((shift == 0 ||
3119 (shift > 0 && shift <= 3
3120 && (1 << shift) == GET_MODE_SIZE (mode)))
3121 && REG_P (index)
3122 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3124 info->type = type;
3125 info->offset = index;
3126 info->shift = shift;
3127 return true;
3130 return false;
3133 static inline bool
3134 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3136 return (offset >= -64 * GET_MODE_SIZE (mode)
3137 && offset < 64 * GET_MODE_SIZE (mode)
3138 && offset % GET_MODE_SIZE (mode) == 0);
3141 static inline bool
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3143 HOST_WIDE_INT offset)
3145 return offset >= -256 && offset < 256;
3148 static inline bool
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3151 return (offset >= 0
3152 && offset < 4096 * GET_MODE_SIZE (mode)
3153 && offset % GET_MODE_SIZE (mode) == 0);
3156 /* Return true if X is a valid address for machine mode MODE. If it is,
3157 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3158 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3160 static bool
3161 aarch64_classify_address (struct aarch64_address_info *info,
3162 rtx x, enum machine_mode mode,
3163 RTX_CODE outer_code, bool strict_p)
3165 enum rtx_code code = GET_CODE (x);
3166 rtx op0, op1;
3167 bool allow_reg_index_p =
3168 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3169 || aarch64_vector_mode_supported_p (mode));
3170 /* Don't support anything other than POST_INC or REG addressing for
3171 AdvSIMD. */
3172 if (aarch64_vect_struct_mode_p (mode)
3173 && (code != POST_INC && code != REG))
3174 return false;
3176 switch (code)
3178 case REG:
3179 case SUBREG:
3180 info->type = ADDRESS_REG_IMM;
3181 info->base = x;
3182 info->offset = const0_rtx;
3183 return aarch64_base_register_rtx_p (x, strict_p);
3185 case PLUS:
3186 op0 = XEXP (x, 0);
3187 op1 = XEXP (x, 1);
3188 if (GET_MODE_SIZE (mode) != 0
3189 && CONST_INT_P (op1)
3190 && aarch64_base_register_rtx_p (op0, strict_p))
3192 HOST_WIDE_INT offset = INTVAL (op1);
3194 info->type = ADDRESS_REG_IMM;
3195 info->base = op0;
3196 info->offset = op1;
3198 /* TImode and TFmode values are allowed in both pairs of X
3199 registers and individual Q registers. The available
3200 address modes are:
3201 X,X: 7-bit signed scaled offset
3202 Q: 9-bit signed offset
3203 We conservatively require an offset representable in either mode.
3205 if (mode == TImode || mode == TFmode)
3206 return (offset_7bit_signed_scaled_p (mode, offset)
3207 && offset_9bit_signed_unscaled_p (mode, offset));
3209 if (outer_code == PARALLEL)
3210 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3211 && offset_7bit_signed_scaled_p (mode, offset));
3212 else
3213 return (offset_9bit_signed_unscaled_p (mode, offset)
3214 || offset_12bit_unsigned_scaled_p (mode, offset));
3217 if (allow_reg_index_p)
3219 /* Look for base + (scaled/extended) index register. */
3220 if (aarch64_base_register_rtx_p (op0, strict_p)
3221 && aarch64_classify_index (info, op1, mode, strict_p))
3223 info->base = op0;
3224 return true;
3226 if (aarch64_base_register_rtx_p (op1, strict_p)
3227 && aarch64_classify_index (info, op0, mode, strict_p))
3229 info->base = op1;
3230 return true;
3234 return false;
3236 case POST_INC:
3237 case POST_DEC:
3238 case PRE_INC:
3239 case PRE_DEC:
3240 info->type = ADDRESS_REG_WB;
3241 info->base = XEXP (x, 0);
3242 info->offset = NULL_RTX;
3243 return aarch64_base_register_rtx_p (info->base, strict_p);
3245 case POST_MODIFY:
3246 case PRE_MODIFY:
3247 info->type = ADDRESS_REG_WB;
3248 info->base = XEXP (x, 0);
3249 if (GET_CODE (XEXP (x, 1)) == PLUS
3250 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3251 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3252 && aarch64_base_register_rtx_p (info->base, strict_p))
3254 HOST_WIDE_INT offset;
3255 info->offset = XEXP (XEXP (x, 1), 1);
3256 offset = INTVAL (info->offset);
3258 /* TImode and TFmode values are allowed in both pairs of X
3259 registers and individual Q registers. The available
3260 address modes are:
3261 X,X: 7-bit signed scaled offset
3262 Q: 9-bit signed offset
3263 We conservatively require an offset representable in either mode.
3265 if (mode == TImode || mode == TFmode)
3266 return (offset_7bit_signed_scaled_p (mode, offset)
3267 && offset_9bit_signed_unscaled_p (mode, offset));
3269 if (outer_code == PARALLEL)
3270 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3271 && offset_7bit_signed_scaled_p (mode, offset));
3272 else
3273 return offset_9bit_signed_unscaled_p (mode, offset);
3275 return false;
3277 case CONST:
3278 case SYMBOL_REF:
3279 case LABEL_REF:
3280 /* load literal: pc-relative constant pool entry. Only supported
3281 for SI mode or larger. */
3282 info->type = ADDRESS_SYMBOLIC;
3283 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3285 rtx sym, addend;
3287 split_const (x, &sym, &addend);
3288 return (GET_CODE (sym) == LABEL_REF
3289 || (GET_CODE (sym) == SYMBOL_REF
3290 && CONSTANT_POOL_ADDRESS_P (sym)));
3292 return false;
3294 case LO_SUM:
3295 info->type = ADDRESS_LO_SUM;
3296 info->base = XEXP (x, 0);
3297 info->offset = XEXP (x, 1);
3298 if (allow_reg_index_p
3299 && aarch64_base_register_rtx_p (info->base, strict_p))
3301 rtx sym, offs;
3302 split_const (info->offset, &sym, &offs);
3303 if (GET_CODE (sym) == SYMBOL_REF
3304 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3305 == SYMBOL_SMALL_ABSOLUTE))
3307 /* The symbol and offset must be aligned to the access size. */
3308 unsigned int align;
3309 unsigned int ref_size;
3311 if (CONSTANT_POOL_ADDRESS_P (sym))
3312 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3313 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3315 tree exp = SYMBOL_REF_DECL (sym);
3316 align = TYPE_ALIGN (TREE_TYPE (exp));
3317 align = CONSTANT_ALIGNMENT (exp, align);
3319 else if (SYMBOL_REF_DECL (sym))
3320 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3321 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3322 && SYMBOL_REF_BLOCK (sym) != NULL)
3323 align = SYMBOL_REF_BLOCK (sym)->alignment;
3324 else
3325 align = BITS_PER_UNIT;
3327 ref_size = GET_MODE_SIZE (mode);
3328 if (ref_size == 0)
3329 ref_size = GET_MODE_SIZE (DImode);
3331 return ((INTVAL (offs) & (ref_size - 1)) == 0
3332 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3335 return false;
3337 default:
3338 return false;
3342 bool
3343 aarch64_symbolic_address_p (rtx x)
3345 rtx offset;
3347 split_const (x, &x, &offset);
3348 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3351 /* Classify the base of symbolic expression X, given that X appears in
3352 context CONTEXT. */
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x,
3356 enum aarch64_symbol_context context)
3358 rtx offset;
3360 split_const (x, &x, &offset);
3361 return aarch64_classify_symbol (x, context);
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3366 mode MODE. */
3367 static bool
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3370 struct aarch64_address_info addr;
3372 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3377 pair operation. */
3378 bool
3379 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3380 RTX_CODE outer_code, bool strict_p)
3382 struct aarch64_address_info addr;
3384 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3388 bool
3389 aarch64_float_const_zero_rtx_p (rtx x)
3391 REAL_VALUE_TYPE r;
3393 if (GET_MODE (x) == VOIDmode)
3394 return false;
3396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3397 if (REAL_VALUE_MINUS_ZERO (r))
3398 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3399 return REAL_VALUES_EQUAL (r, dconst0);
3402 /* Return the fixed registers used for condition codes. */
3404 static bool
3405 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3407 *p1 = CC_REGNUM;
3408 *p2 = INVALID_REGNUM;
3409 return true;
3412 enum machine_mode
3413 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3415 /* All floating point compares return CCFP if it is an equality
3416 comparison, and CCFPE otherwise. */
3417 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3419 switch (code)
3421 case EQ:
3422 case NE:
3423 case UNORDERED:
3424 case ORDERED:
3425 case UNLT:
3426 case UNLE:
3427 case UNGT:
3428 case UNGE:
3429 case UNEQ:
3430 case LTGT:
3431 return CCFPmode;
3433 case LT:
3434 case LE:
3435 case GT:
3436 case GE:
3437 return CCFPEmode;
3439 default:
3440 gcc_unreachable ();
3444 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3445 && y == const0_rtx
3446 && (code == EQ || code == NE || code == LT || code == GE)
3447 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3448 || GET_CODE (x) == NEG))
3449 return CC_NZmode;
3451 /* A compare with a shifted operand. Because of canonicalization,
3452 the comparison will have to be swapped when we emit the assembly
3453 code. */
3454 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3455 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3456 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3457 || GET_CODE (x) == LSHIFTRT
3458 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3459 return CC_SWPmode;
3461 /* Similarly for a negated operand, but we can only do this for
3462 equalities. */
3463 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3464 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3465 && (code == EQ || code == NE)
3466 && GET_CODE (x) == NEG)
3467 return CC_Zmode;
3469 /* A compare of a mode narrower than SI mode against zero can be done
3470 by extending the value in the comparison. */
3471 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3472 && y == const0_rtx)
3473 /* Only use sign-extension if we really need it. */
3474 return ((code == GT || code == GE || code == LE || code == LT)
3475 ? CC_SESWPmode : CC_ZESWPmode);
3477 /* For everything else, return CCmode. */
3478 return CCmode;
3481 static unsigned
3482 aarch64_get_condition_code (rtx x)
3484 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3485 enum rtx_code comp_code = GET_CODE (x);
3487 if (GET_MODE_CLASS (mode) != MODE_CC)
3488 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3490 switch (mode)
3492 case CCFPmode:
3493 case CCFPEmode:
3494 switch (comp_code)
3496 case GE: return AARCH64_GE;
3497 case GT: return AARCH64_GT;
3498 case LE: return AARCH64_LS;
3499 case LT: return AARCH64_MI;
3500 case NE: return AARCH64_NE;
3501 case EQ: return AARCH64_EQ;
3502 case ORDERED: return AARCH64_VC;
3503 case UNORDERED: return AARCH64_VS;
3504 case UNLT: return AARCH64_LT;
3505 case UNLE: return AARCH64_LE;
3506 case UNGT: return AARCH64_HI;
3507 case UNGE: return AARCH64_PL;
3508 default: gcc_unreachable ();
3510 break;
3512 case CCmode:
3513 switch (comp_code)
3515 case NE: return AARCH64_NE;
3516 case EQ: return AARCH64_EQ;
3517 case GE: return AARCH64_GE;
3518 case GT: return AARCH64_GT;
3519 case LE: return AARCH64_LE;
3520 case LT: return AARCH64_LT;
3521 case GEU: return AARCH64_CS;
3522 case GTU: return AARCH64_HI;
3523 case LEU: return AARCH64_LS;
3524 case LTU: return AARCH64_CC;
3525 default: gcc_unreachable ();
3527 break;
3529 case CC_SWPmode:
3530 case CC_ZESWPmode:
3531 case CC_SESWPmode:
3532 switch (comp_code)
3534 case NE: return AARCH64_NE;
3535 case EQ: return AARCH64_EQ;
3536 case GE: return AARCH64_LE;
3537 case GT: return AARCH64_LT;
3538 case LE: return AARCH64_GE;
3539 case LT: return AARCH64_GT;
3540 case GEU: return AARCH64_LS;
3541 case GTU: return AARCH64_CC;
3542 case LEU: return AARCH64_CS;
3543 case LTU: return AARCH64_HI;
3544 default: gcc_unreachable ();
3546 break;
3548 case CC_NZmode:
3549 switch (comp_code)
3551 case NE: return AARCH64_NE;
3552 case EQ: return AARCH64_EQ;
3553 case GE: return AARCH64_PL;
3554 case LT: return AARCH64_MI;
3555 default: gcc_unreachable ();
3557 break;
3559 case CC_Zmode:
3560 switch (comp_code)
3562 case NE: return AARCH64_NE;
3563 case EQ: return AARCH64_EQ;
3564 default: gcc_unreachable ();
3566 break;
3568 default:
3569 gcc_unreachable ();
3570 break;
3574 static unsigned
3575 bit_count (unsigned HOST_WIDE_INT value)
3577 unsigned count = 0;
3579 while (value)
3581 count++;
3582 value &= value - 1;
3585 return count;
3588 void
3589 aarch64_print_operand (FILE *f, rtx x, char code)
3591 switch (code)
3593 /* An integer or symbol address without a preceding # sign. */
3594 case 'c':
3595 switch (GET_CODE (x))
3597 case CONST_INT:
3598 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3599 break;
3601 case SYMBOL_REF:
3602 output_addr_const (f, x);
3603 break;
3605 case CONST:
3606 if (GET_CODE (XEXP (x, 0)) == PLUS
3607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3609 output_addr_const (f, x);
3610 break;
3612 /* Fall through. */
3614 default:
3615 output_operand_lossage ("Unsupported operand for code '%c'", code);
3617 break;
3619 case 'e':
3620 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3622 int n;
3624 if (GET_CODE (x) != CONST_INT
3625 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3627 output_operand_lossage ("invalid operand for '%%%c'", code);
3628 return;
3631 switch (n)
3633 case 3:
3634 fputc ('b', f);
3635 break;
3636 case 4:
3637 fputc ('h', f);
3638 break;
3639 case 5:
3640 fputc ('w', f);
3641 break;
3642 default:
3643 output_operand_lossage ("invalid operand for '%%%c'", code);
3644 return;
3647 break;
3649 case 'p':
3651 int n;
3653 /* Print N such that 2^N == X. */
3654 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3656 output_operand_lossage ("invalid operand for '%%%c'", code);
3657 return;
3660 asm_fprintf (f, "%d", n);
3662 break;
3664 case 'P':
3665 /* Print the number of non-zero bits in X (a const_int). */
3666 if (GET_CODE (x) != CONST_INT)
3668 output_operand_lossage ("invalid operand for '%%%c'", code);
3669 return;
3672 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3673 break;
3675 case 'H':
3676 /* Print the higher numbered register of a pair (TImode) of regs. */
3677 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3679 output_operand_lossage ("invalid operand for '%%%c'", code);
3680 return;
3683 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3684 break;
3686 case 'm':
3687 /* Print a condition (eq, ne, etc). */
3689 /* CONST_TRUE_RTX means always -- that's the default. */
3690 if (x == const_true_rtx)
3691 return;
3693 if (!COMPARISON_P (x))
3695 output_operand_lossage ("invalid operand for '%%%c'", code);
3696 return;
3699 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3700 break;
3702 case 'M':
3703 /* Print the inverse of a condition (eq <-> ne, etc). */
3705 /* CONST_TRUE_RTX means never -- that's the default. */
3706 if (x == const_true_rtx)
3708 fputs ("nv", f);
3709 return;
3712 if (!COMPARISON_P (x))
3714 output_operand_lossage ("invalid operand for '%%%c'", code);
3715 return;
3718 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3719 (aarch64_get_condition_code (x))], f);
3720 break;
3722 case 'b':
3723 case 'h':
3724 case 's':
3725 case 'd':
3726 case 'q':
3727 /* Print a scalar FP/SIMD register name. */
3728 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3730 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3731 return;
3733 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3734 break;
3736 case 'S':
3737 case 'T':
3738 case 'U':
3739 case 'V':
3740 /* Print the first FP/SIMD register name in a list. */
3741 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3743 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3744 return;
3746 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3747 break;
3749 case 'X':
3750 /* Print bottom 16 bits of integer constant in hex. */
3751 if (GET_CODE (x) != CONST_INT)
3753 output_operand_lossage ("invalid operand for '%%%c'", code);
3754 return;
3756 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3757 break;
3759 case 'w':
3760 case 'x':
3761 /* Print a general register name or the zero register (32-bit or
3762 64-bit). */
3763 if (x == const0_rtx
3764 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3766 asm_fprintf (f, "%czr", code);
3767 break;
3770 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3772 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3773 break;
3776 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3778 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3779 break;
3782 /* Fall through */
3784 case 0:
3785 /* Print a normal operand, if it's a general register, then we
3786 assume DImode. */
3787 if (x == NULL)
3789 output_operand_lossage ("missing operand");
3790 return;
3793 switch (GET_CODE (x))
3795 case REG:
3796 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3797 break;
3799 case MEM:
3800 aarch64_memory_reference_mode = GET_MODE (x);
3801 output_address (XEXP (x, 0));
3802 break;
3804 case LABEL_REF:
3805 case SYMBOL_REF:
3806 output_addr_const (asm_out_file, x);
3807 break;
3809 case CONST_INT:
3810 asm_fprintf (f, "%wd", INTVAL (x));
3811 break;
3813 case CONST_VECTOR:
3814 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3816 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3817 HOST_WIDE_INT_MIN,
3818 HOST_WIDE_INT_MAX));
3819 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3821 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3823 fputc ('0', f);
3825 else
3826 gcc_unreachable ();
3827 break;
3829 case CONST_DOUBLE:
3830 /* CONST_DOUBLE can represent a double-width integer.
3831 In this case, the mode of x is VOIDmode. */
3832 if (GET_MODE (x) == VOIDmode)
3833 ; /* Do Nothing. */
3834 else if (aarch64_float_const_zero_rtx_p (x))
3836 fputc ('0', f);
3837 break;
3839 else if (aarch64_float_const_representable_p (x))
3841 #define buf_size 20
3842 char float_buf[buf_size] = {'\0'};
3843 REAL_VALUE_TYPE r;
3844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845 real_to_decimal_for_mode (float_buf, &r,
3846 buf_size, buf_size,
3847 1, GET_MODE (x));
3848 asm_fprintf (asm_out_file, "%s", float_buf);
3849 break;
3850 #undef buf_size
3852 output_operand_lossage ("invalid constant");
3853 return;
3854 default:
3855 output_operand_lossage ("invalid operand");
3856 return;
3858 break;
3860 case 'A':
3861 if (GET_CODE (x) == HIGH)
3862 x = XEXP (x, 0);
3864 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3866 case SYMBOL_SMALL_GOT:
3867 asm_fprintf (asm_out_file, ":got:");
3868 break;
3870 case SYMBOL_SMALL_TLSGD:
3871 asm_fprintf (asm_out_file, ":tlsgd:");
3872 break;
3874 case SYMBOL_SMALL_TLSDESC:
3875 asm_fprintf (asm_out_file, ":tlsdesc:");
3876 break;
3878 case SYMBOL_SMALL_GOTTPREL:
3879 asm_fprintf (asm_out_file, ":gottprel:");
3880 break;
3882 case SYMBOL_SMALL_TPREL:
3883 asm_fprintf (asm_out_file, ":tprel:");
3884 break;
3886 case SYMBOL_TINY_GOT:
3887 gcc_unreachable ();
3888 break;
3890 default:
3891 break;
3893 output_addr_const (asm_out_file, x);
3894 break;
3896 case 'L':
3897 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3899 case SYMBOL_SMALL_GOT:
3900 asm_fprintf (asm_out_file, ":lo12:");
3901 break;
3903 case SYMBOL_SMALL_TLSGD:
3904 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3905 break;
3907 case SYMBOL_SMALL_TLSDESC:
3908 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3909 break;
3911 case SYMBOL_SMALL_GOTTPREL:
3912 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3913 break;
3915 case SYMBOL_SMALL_TPREL:
3916 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3917 break;
3919 case SYMBOL_TINY_GOT:
3920 asm_fprintf (asm_out_file, ":got:");
3921 break;
3923 default:
3924 break;
3926 output_addr_const (asm_out_file, x);
3927 break;
3929 case 'G':
3931 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3933 case SYMBOL_SMALL_TPREL:
3934 asm_fprintf (asm_out_file, ":tprel_hi12:");
3935 break;
3936 default:
3937 break;
3939 output_addr_const (asm_out_file, x);
3940 break;
3942 default:
3943 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3944 return;
3948 void
3949 aarch64_print_operand_address (FILE *f, rtx x)
3951 struct aarch64_address_info addr;
3953 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3954 MEM, true))
3955 switch (addr.type)
3957 case ADDRESS_REG_IMM:
3958 if (addr.offset == const0_rtx)
3959 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3960 else
3961 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3962 INTVAL (addr.offset));
3963 return;
3965 case ADDRESS_REG_REG:
3966 if (addr.shift == 0)
3967 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3968 reg_names [REGNO (addr.offset)]);
3969 else
3970 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3971 reg_names [REGNO (addr.offset)], addr.shift);
3972 return;
3974 case ADDRESS_REG_UXTW:
3975 if (addr.shift == 0)
3976 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3977 REGNO (addr.offset) - R0_REGNUM);
3978 else
3979 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3980 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3981 return;
3983 case ADDRESS_REG_SXTW:
3984 if (addr.shift == 0)
3985 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3986 REGNO (addr.offset) - R0_REGNUM);
3987 else
3988 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3989 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3990 return;
3992 case ADDRESS_REG_WB:
3993 switch (GET_CODE (x))
3995 case PRE_INC:
3996 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3997 GET_MODE_SIZE (aarch64_memory_reference_mode));
3998 return;
3999 case POST_INC:
4000 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4001 GET_MODE_SIZE (aarch64_memory_reference_mode));
4002 return;
4003 case PRE_DEC:
4004 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4005 GET_MODE_SIZE (aarch64_memory_reference_mode));
4006 return;
4007 case POST_DEC:
4008 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4009 GET_MODE_SIZE (aarch64_memory_reference_mode));
4010 return;
4011 case PRE_MODIFY:
4012 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4013 INTVAL (addr.offset));
4014 return;
4015 case POST_MODIFY:
4016 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4017 INTVAL (addr.offset));
4018 return;
4019 default:
4020 break;
4022 break;
4024 case ADDRESS_LO_SUM:
4025 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4026 output_addr_const (f, addr.offset);
4027 asm_fprintf (f, "]");
4028 return;
4030 case ADDRESS_SYMBOLIC:
4031 break;
4034 output_addr_const (f, x);
4037 bool
4038 aarch64_label_mentioned_p (rtx x)
4040 const char *fmt;
4041 int i;
4043 if (GET_CODE (x) == LABEL_REF)
4044 return true;
4046 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047 referencing instruction, but they are constant offsets, not
4048 symbols. */
4049 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4050 return false;
4052 fmt = GET_RTX_FORMAT (GET_CODE (x));
4053 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4055 if (fmt[i] == 'E')
4057 int j;
4059 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4060 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4061 return 1;
4063 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4064 return 1;
4067 return 0;
4070 /* Implement REGNO_REG_CLASS. */
4072 enum reg_class
4073 aarch64_regno_regclass (unsigned regno)
4075 if (GP_REGNUM_P (regno))
4076 return CORE_REGS;
4078 if (regno == SP_REGNUM)
4079 return STACK_REG;
4081 if (regno == FRAME_POINTER_REGNUM
4082 || regno == ARG_POINTER_REGNUM)
4083 return POINTER_REGS;
4085 if (FP_REGNUM_P (regno))
4086 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4088 return NO_REGS;
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092 operand. If we find one, push the reload and return the new rtx. */
4095 aarch64_legitimize_reload_address (rtx *x_p,
4096 enum machine_mode mode,
4097 int opnum, int type,
4098 int ind_levels ATTRIBUTE_UNUSED)
4100 rtx x = *x_p;
4102 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4103 if (aarch64_vect_struct_mode_p (mode)
4104 && GET_CODE (x) == PLUS
4105 && REG_P (XEXP (x, 0))
4106 && CONST_INT_P (XEXP (x, 1)))
4108 rtx orig_rtx = x;
4109 x = copy_rtx (x);
4110 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4111 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4112 opnum, (enum reload_type) type);
4113 return x;
4116 /* We must recognize output that we have already generated ourselves. */
4117 if (GET_CODE (x) == PLUS
4118 && GET_CODE (XEXP (x, 0)) == PLUS
4119 && REG_P (XEXP (XEXP (x, 0), 0))
4120 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4121 && CONST_INT_P (XEXP (x, 1)))
4123 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4124 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4125 opnum, (enum reload_type) type);
4126 return x;
4129 /* We wish to handle large displacements off a base register by splitting
4130 the addend across an add and the mem insn. This can cut the number of
4131 extra insns needed from 3 to 1. It is only useful for load/store of a
4132 single register with 12 bit offset field. */
4133 if (GET_CODE (x) == PLUS
4134 && REG_P (XEXP (x, 0))
4135 && CONST_INT_P (XEXP (x, 1))
4136 && HARD_REGISTER_P (XEXP (x, 0))
4137 && mode != TImode
4138 && mode != TFmode
4139 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4141 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4142 HOST_WIDE_INT low = val & 0xfff;
4143 HOST_WIDE_INT high = val - low;
4144 HOST_WIDE_INT offs;
4145 rtx cst;
4146 enum machine_mode xmode = GET_MODE (x);
4148 /* In ILP32, xmode can be either DImode or SImode. */
4149 gcc_assert (xmode == DImode || xmode == SImode);
4151 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4152 BLKmode alignment. */
4153 if (GET_MODE_SIZE (mode) == 0)
4154 return NULL_RTX;
4156 offs = low % GET_MODE_SIZE (mode);
4158 /* Align misaligned offset by adjusting high part to compensate. */
4159 if (offs != 0)
4161 if (aarch64_uimm12_shift (high + offs))
4163 /* Align down. */
4164 low = low - offs;
4165 high = high + offs;
4167 else
4169 /* Align up. */
4170 offs = GET_MODE_SIZE (mode) - offs;
4171 low = low + offs;
4172 high = high + (low & 0x1000) - offs;
4173 low &= 0xfff;
4177 /* Check for overflow. */
4178 if (high + low != val)
4179 return NULL_RTX;
4181 cst = GEN_INT (high);
4182 if (!aarch64_uimm12_shift (high))
4183 cst = force_const_mem (xmode, cst);
4185 /* Reload high part into base reg, leaving the low part
4186 in the mem instruction.
4187 Note that replacing this gen_rtx_PLUS with plus_constant is
4188 wrong in this case because we rely on the
4189 (plus (plus reg c1) c2) structure being preserved so that
4190 XEXP (*p, 0) in push_reload below uses the correct term. */
4191 x = gen_rtx_PLUS (xmode,
4192 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4193 GEN_INT (low));
4195 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4196 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4197 opnum, (enum reload_type) type);
4198 return x;
4201 return NULL_RTX;
4205 static reg_class_t
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4207 reg_class_t rclass,
4208 enum machine_mode mode,
4209 secondary_reload_info *sri)
4211 /* Without the TARGET_SIMD instructions we cannot move a Q register
4212 to a Q register directly. We need a scratch. */
4213 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4214 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4215 && reg_class_subset_p (rclass, FP_REGS))
4217 if (mode == TFmode)
4218 sri->icode = CODE_FOR_aarch64_reload_movtf;
4219 else if (mode == TImode)
4220 sri->icode = CODE_FOR_aarch64_reload_movti;
4221 return NO_REGS;
4224 /* A TFmode or TImode memory access should be handled via an FP_REGS
4225 because AArch64 has richer addressing modes for LDR/STR instructions
4226 than LDP/STP instructions. */
4227 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4228 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4229 return FP_REGS;
4231 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4232 return CORE_REGS;
4234 return NO_REGS;
4237 static bool
4238 aarch64_can_eliminate (const int from, const int to)
4240 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4243 if (frame_pointer_needed)
4245 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246 return true;
4247 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4248 return false;
4249 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4250 && !cfun->calls_alloca)
4251 return true;
4252 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4253 return true;
4255 return false;
4258 return true;
4261 HOST_WIDE_INT
4262 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4264 HOST_WIDE_INT frame_size;
4265 HOST_WIDE_INT offset;
4267 aarch64_layout_frame ();
4268 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4269 + crtl->outgoing_args_size
4270 + cfun->machine->saved_varargs_size);
4272 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4273 offset = frame_size;
4275 if (to == HARD_FRAME_POINTER_REGNUM)
4277 if (from == ARG_POINTER_REGNUM)
4278 return offset - crtl->outgoing_args_size;
4280 if (from == FRAME_POINTER_REGNUM)
4281 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4284 if (to == STACK_POINTER_REGNUM)
4286 if (from == FRAME_POINTER_REGNUM)
4288 HOST_WIDE_INT elim = crtl->outgoing_args_size
4289 + cfun->machine->frame.saved_regs_size
4290 + get_frame_size ();
4291 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4292 return elim;
4296 return offset;
4300 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4301 previous frame. */
4304 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4306 if (count != 0)
4307 return const0_rtx;
4308 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4312 static void
4313 aarch64_asm_trampoline_template (FILE *f)
4315 if (TARGET_ILP32)
4317 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4318 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4320 else
4322 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4323 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4325 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4326 assemble_aligned_integer (4, const0_rtx);
4327 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4331 static void
4332 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4334 rtx fnaddr, mem, a_tramp;
4335 const int tramp_code_sz = 16;
4337 /* Don't need to copy the trailing D-words, we fill those in below. */
4338 emit_block_move (m_tramp, assemble_trampoline_template (),
4339 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4340 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4341 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4342 if (GET_MODE (fnaddr) != ptr_mode)
4343 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4344 emit_move_insn (mem, fnaddr);
4346 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4347 emit_move_insn (mem, chain_value);
4349 /* XXX We should really define a "clear_cache" pattern and use
4350 gen_clear_cache(). */
4351 a_tramp = XEXP (m_tramp, 0);
4352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4353 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4354 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4355 ptr_mode);
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4361 switch (regclass)
4363 case CORE_REGS:
4364 case POINTER_REGS:
4365 case GENERAL_REGS:
4366 case ALL_REGS:
4367 case FP_REGS:
4368 case FP_LO_REGS:
4369 return
4370 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4371 (GET_MODE_SIZE (mode) + 7) / 8;
4372 case STACK_REG:
4373 return 1;
4375 case NO_REGS:
4376 return 0;
4378 default:
4379 break;
4381 gcc_unreachable ();
4384 static reg_class_t
4385 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4387 if (regclass == POINTER_REGS)
4388 return GENERAL_REGS;
4390 if (regclass == STACK_REG)
4392 if (REG_P(x)
4393 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4394 return regclass;
4396 return NO_REGS;
4399 /* If it's an integer immediate that MOVI can't handle, then
4400 FP_REGS is not an option, so we return NO_REGS instead. */
4401 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4402 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4403 return NO_REGS;
4405 /* Register eliminiation can result in a request for
4406 SP+constant->FP_REGS. We cannot support such operations which
4407 use SP as source and an FP_REG as destination, so reject out
4408 right now. */
4409 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4411 rtx lhs = XEXP (x, 0);
4413 /* Look through a possible SUBREG introduced by ILP32. */
4414 if (GET_CODE (lhs) == SUBREG)
4415 lhs = SUBREG_REG (lhs);
4417 gcc_assert (REG_P (lhs));
4418 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4419 POINTER_REGS));
4420 return NO_REGS;
4423 return regclass;
4426 void
4427 aarch64_asm_output_labelref (FILE* f, const char *name)
4429 asm_fprintf (f, "%U%s", name);
4432 static void
4433 aarch64_elf_asm_constructor (rtx symbol, int priority)
4435 if (priority == DEFAULT_INIT_PRIORITY)
4436 default_ctor_section_asm_out_constructor (symbol, priority);
4437 else
4439 section *s;
4440 char buf[18];
4441 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4442 s = get_section (buf, SECTION_WRITE, NULL);
4443 switch_to_section (s);
4444 assemble_align (POINTER_SIZE);
4445 assemble_aligned_integer (POINTER_BYTES, symbol);
4449 static void
4450 aarch64_elf_asm_destructor (rtx symbol, int priority)
4452 if (priority == DEFAULT_INIT_PRIORITY)
4453 default_dtor_section_asm_out_destructor (symbol, priority);
4454 else
4456 section *s;
4457 char buf[18];
4458 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4459 s = get_section (buf, SECTION_WRITE, NULL);
4460 switch_to_section (s);
4461 assemble_align (POINTER_SIZE);
4462 assemble_aligned_integer (POINTER_BYTES, symbol);
4466 const char*
4467 aarch64_output_casesi (rtx *operands)
4469 char buf[100];
4470 char label[100];
4471 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4472 int index;
4473 static const char *const patterns[4][2] =
4476 "ldrb\t%w3, [%0,%w1,uxtw]",
4477 "add\t%3, %4, %w3, sxtb #2"
4480 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481 "add\t%3, %4, %w3, sxth #2"
4484 "ldr\t%w3, [%0,%w1,uxtw #2]",
4485 "add\t%3, %4, %w3, sxtw #2"
4487 /* We assume that DImode is only generated when not optimizing and
4488 that we don't really need 64-bit address offsets. That would
4489 imply an object file with 8GB of code in a single function! */
4491 "ldr\t%w3, [%0,%w1,uxtw #2]",
4492 "add\t%3, %4, %w3, sxtw #2"
4496 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4498 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4500 gcc_assert (index >= 0 && index <= 3);
4502 /* Need to implement table size reduction, by chaning the code below. */
4503 output_asm_insn (patterns[index][0], operands);
4504 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4505 snprintf (buf, sizeof (buf),
4506 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4507 output_asm_insn (buf, operands);
4508 output_asm_insn (patterns[index][1], operands);
4509 output_asm_insn ("br\t%3", operands);
4510 assemble_label (asm_out_file, label);
4511 return "";
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4517 operator. */
4520 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4522 if (shift >= 0 && shift <= 3)
4524 int size;
4525 for (size = 8; size <= 32; size *= 2)
4527 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4528 if (mask == bits << shift)
4529 return size;
4532 return 0;
4535 static bool
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4537 const_rtx x ATTRIBUTE_UNUSED)
4539 /* We can't use blocks for constants when we're using a per-function
4540 constant pool. */
4541 return false;
4544 static section *
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4546 rtx x ATTRIBUTE_UNUSED,
4547 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4549 /* Force all constant pool entries into the current function section. */
4550 return function_section (current_function_decl);
4554 /* Costs. */
4556 /* Helper function for rtx cost calculation. Strip a shift expression
4557 from X. Returns the inner operand if successful, or the original
4558 expression on failure. */
4559 static rtx
4560 aarch64_strip_shift (rtx x)
4562 rtx op = x;
4564 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565 we can convert both to ROR during final output. */
4566 if ((GET_CODE (op) == ASHIFT
4567 || GET_CODE (op) == ASHIFTRT
4568 || GET_CODE (op) == LSHIFTRT
4569 || GET_CODE (op) == ROTATERT
4570 || GET_CODE (op) == ROTATE)
4571 && CONST_INT_P (XEXP (op, 1)))
4572 return XEXP (op, 0);
4574 if (GET_CODE (op) == MULT
4575 && CONST_INT_P (XEXP (op, 1))
4576 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4577 return XEXP (op, 0);
4579 return x;
4582 /* Helper function for rtx cost calculation. Strip an extend
4583 expression from X. Returns the inner operand if successful, or the
4584 original expression on failure. We deal with a number of possible
4585 canonicalization variations here. */
4586 static rtx
4587 aarch64_strip_extend (rtx x)
4589 rtx op = x;
4591 /* Zero and sign extraction of a widened value. */
4592 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4593 && XEXP (op, 2) == const0_rtx
4594 && GET_CODE (XEXP (op, 0)) == MULT
4595 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4596 XEXP (op, 1)))
4597 return XEXP (XEXP (op, 0), 0);
4599 /* It can also be represented (for zero-extend) as an AND with an
4600 immediate. */
4601 if (GET_CODE (op) == AND
4602 && GET_CODE (XEXP (op, 0)) == MULT
4603 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4604 && CONST_INT_P (XEXP (op, 1))
4605 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4606 INTVAL (XEXP (op, 1))) != 0)
4607 return XEXP (XEXP (op, 0), 0);
4609 /* Now handle extended register, as this may also have an optional
4610 left shift by 1..4. */
4611 if (GET_CODE (op) == ASHIFT
4612 && CONST_INT_P (XEXP (op, 1))
4613 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4614 op = XEXP (op, 0);
4616 if (GET_CODE (op) == ZERO_EXTEND
4617 || GET_CODE (op) == SIGN_EXTEND)
4618 op = XEXP (op, 0);
4620 if (op != x)
4621 return op;
4623 return x;
4626 /* Helper function for rtx cost calculation. Calculate the cost of
4627 a MULT, which may be part of a multiply-accumulate rtx. Return
4628 the calculated cost of the expression, recursing manually in to
4629 operands where needed. */
4631 static int
4632 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4634 rtx op0, op1;
4635 const struct cpu_cost_table *extra_cost
4636 = aarch64_tune_params->insn_extra_cost;
4637 int cost = 0;
4638 bool maybe_fma = (outer == PLUS || outer == MINUS);
4639 enum machine_mode mode = GET_MODE (x);
4641 gcc_checking_assert (code == MULT);
4643 op0 = XEXP (x, 0);
4644 op1 = XEXP (x, 1);
4646 if (VECTOR_MODE_P (mode))
4647 mode = GET_MODE_INNER (mode);
4649 /* Integer multiply/fma. */
4650 if (GET_MODE_CLASS (mode) == MODE_INT)
4652 /* The multiply will be canonicalized as a shift, cost it as such. */
4653 if (CONST_INT_P (op1)
4654 && exact_log2 (INTVAL (op1)) > 0)
4656 if (speed)
4658 if (maybe_fma)
4659 /* ADD (shifted register). */
4660 cost += extra_cost->alu.arith_shift;
4661 else
4662 /* LSL (immediate). */
4663 cost += extra_cost->alu.shift;
4666 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4668 return cost;
4671 /* Integer multiplies or FMAs have zero/sign extending variants. */
4672 if ((GET_CODE (op0) == ZERO_EXTEND
4673 && GET_CODE (op1) == ZERO_EXTEND)
4674 || (GET_CODE (op0) == SIGN_EXTEND
4675 && GET_CODE (op1) == SIGN_EXTEND))
4677 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4678 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4680 if (speed)
4682 if (maybe_fma)
4683 /* MADD/SMADDL/UMADDL. */
4684 cost += extra_cost->mult[0].extend_add;
4685 else
4686 /* MUL/SMULL/UMULL. */
4687 cost += extra_cost->mult[0].extend;
4690 return cost;
4693 /* This is either an integer multiply or an FMA. In both cases
4694 we want to recurse and cost the operands. */
4695 cost += rtx_cost (op0, MULT, 0, speed)
4696 + rtx_cost (op1, MULT, 1, speed);
4698 if (speed)
4700 if (maybe_fma)
4701 /* MADD. */
4702 cost += extra_cost->mult[mode == DImode].add;
4703 else
4704 /* MUL. */
4705 cost += extra_cost->mult[mode == DImode].simple;
4708 return cost;
4710 else
4712 if (speed)
4714 /* Floating-point FMA/FMUL can also support negations of the
4715 operands. */
4716 if (GET_CODE (op0) == NEG)
4717 op0 = XEXP (op0, 0);
4718 if (GET_CODE (op1) == NEG)
4719 op1 = XEXP (op1, 0);
4721 if (maybe_fma)
4722 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4723 cost += extra_cost->fp[mode == DFmode].fma;
4724 else
4725 /* FMUL/FNMUL. */
4726 cost += extra_cost->fp[mode == DFmode].mult;
4729 cost += rtx_cost (op0, MULT, 0, speed)
4730 + rtx_cost (op1, MULT, 1, speed);
4731 return cost;
4735 static int
4736 aarch64_address_cost (rtx x,
4737 enum machine_mode mode,
4738 addr_space_t as ATTRIBUTE_UNUSED,
4739 bool speed)
4741 enum rtx_code c = GET_CODE (x);
4742 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4743 struct aarch64_address_info info;
4744 int cost = 0;
4745 info.shift = 0;
4747 if (!aarch64_classify_address (&info, x, mode, c, false))
4749 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4751 /* This is a CONST or SYMBOL ref which will be split
4752 in a different way depending on the code model in use.
4753 Cost it through the generic infrastructure. */
4754 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4755 /* Divide through by the cost of one instruction to
4756 bring it to the same units as the address costs. */
4757 cost_symbol_ref /= COSTS_N_INSNS (1);
4758 /* The cost is then the cost of preparing the address,
4759 followed by an immediate (possibly 0) offset. */
4760 return cost_symbol_ref + addr_cost->imm_offset;
4762 else
4764 /* This is most likely a jump table from a case
4765 statement. */
4766 return addr_cost->register_offset;
4770 switch (info.type)
4772 case ADDRESS_LO_SUM:
4773 case ADDRESS_SYMBOLIC:
4774 case ADDRESS_REG_IMM:
4775 cost += addr_cost->imm_offset;
4776 break;
4778 case ADDRESS_REG_WB:
4779 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4780 cost += addr_cost->pre_modify;
4781 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4782 cost += addr_cost->post_modify;
4783 else
4784 gcc_unreachable ();
4786 break;
4788 case ADDRESS_REG_REG:
4789 cost += addr_cost->register_offset;
4790 break;
4792 case ADDRESS_REG_UXTW:
4793 case ADDRESS_REG_SXTW:
4794 cost += addr_cost->register_extend;
4795 break;
4797 default:
4798 gcc_unreachable ();
4802 if (info.shift > 0)
4804 /* For the sake of calculating the cost of the shifted register
4805 component, we can treat same sized modes in the same way. */
4806 switch (GET_MODE_BITSIZE (mode))
4808 case 16:
4809 cost += addr_cost->addr_scale_costs.hi;
4810 break;
4812 case 32:
4813 cost += addr_cost->addr_scale_costs.si;
4814 break;
4816 case 64:
4817 cost += addr_cost->addr_scale_costs.di;
4818 break;
4820 /* We can't tell, or this is a 128-bit vector. */
4821 default:
4822 cost += addr_cost->addr_scale_costs.ti;
4823 break;
4827 return cost;
4830 /* Return true if the RTX X in mode MODE is a zero or sign extract
4831 usable in an ADD or SUB (extended register) instruction. */
4832 static bool
4833 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4835 /* Catch add with a sign extract.
4836 This is add_<optab><mode>_multp2. */
4837 if (GET_CODE (x) == SIGN_EXTRACT
4838 || GET_CODE (x) == ZERO_EXTRACT)
4840 rtx op0 = XEXP (x, 0);
4841 rtx op1 = XEXP (x, 1);
4842 rtx op2 = XEXP (x, 2);
4844 if (GET_CODE (op0) == MULT
4845 && CONST_INT_P (op1)
4846 && op2 == const0_rtx
4847 && CONST_INT_P (XEXP (op0, 1))
4848 && aarch64_is_extend_from_extract (mode,
4849 XEXP (op0, 1),
4850 op1))
4852 return true;
4856 return false;
4859 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4860 storing it in *COST. Result is true if the total cost of the operation
4861 has now been calculated. */
4862 static bool
4863 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4865 rtx inner;
4866 rtx comparator;
4867 enum rtx_code cmpcode;
4869 if (COMPARISON_P (op0))
4871 inner = XEXP (op0, 0);
4872 comparator = XEXP (op0, 1);
4873 cmpcode = GET_CODE (op0);
4875 else
4877 inner = op0;
4878 comparator = const0_rtx;
4879 cmpcode = NE;
4882 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4884 /* Conditional branch. */
4885 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4886 return true;
4887 else
4889 if (cmpcode == NE || cmpcode == EQ)
4891 if (comparator == const0_rtx)
4893 /* TBZ/TBNZ/CBZ/CBNZ. */
4894 if (GET_CODE (inner) == ZERO_EXTRACT)
4895 /* TBZ/TBNZ. */
4896 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4897 0, speed);
4898 else
4899 /* CBZ/CBNZ. */
4900 *cost += rtx_cost (inner, cmpcode, 0, speed);
4902 return true;
4905 else if (cmpcode == LT || cmpcode == GE)
4907 /* TBZ/TBNZ. */
4908 if (comparator == const0_rtx)
4909 return true;
4913 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4915 /* It's a conditional operation based on the status flags,
4916 so it must be some flavor of CSEL. */
4918 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4919 if (GET_CODE (op1) == NEG
4920 || GET_CODE (op1) == NOT
4921 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4922 op1 = XEXP (op1, 0);
4924 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4925 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4926 return true;
4929 /* We don't know what this is, cost all operands. */
4930 return false;
4933 /* Calculate the cost of calculating X, storing it in *COST. Result
4934 is true if the total cost of the operation has now been calculated. */
4935 static bool
4936 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4937 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4939 rtx op0, op1, op2;
4940 const struct cpu_cost_table *extra_cost
4941 = aarch64_tune_params->insn_extra_cost;
4942 enum machine_mode mode = GET_MODE (x);
4944 /* By default, assume that everything has equivalent cost to the
4945 cheapest instruction. Any additional costs are applied as a delta
4946 above this default. */
4947 *cost = COSTS_N_INSNS (1);
4949 /* TODO: The cost infrastructure currently does not handle
4950 vector operations. Assume that all vector operations
4951 are equally expensive. */
4952 if (VECTOR_MODE_P (mode))
4954 if (speed)
4955 *cost += extra_cost->vect.alu;
4956 return true;
4959 switch (code)
4961 case SET:
4962 /* The cost depends entirely on the operands to SET. */
4963 *cost = 0;
4964 op0 = SET_DEST (x);
4965 op1 = SET_SRC (x);
4967 switch (GET_CODE (op0))
4969 case MEM:
4970 if (speed)
4972 rtx address = XEXP (op0, 0);
4973 if (GET_MODE_CLASS (mode) == MODE_INT)
4974 *cost += extra_cost->ldst.store;
4975 else if (mode == SFmode)
4976 *cost += extra_cost->ldst.storef;
4977 else if (mode == DFmode)
4978 *cost += extra_cost->ldst.stored;
4980 *cost +=
4981 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4982 0, speed));
4985 *cost += rtx_cost (op1, SET, 1, speed);
4986 return true;
4988 case SUBREG:
4989 if (! REG_P (SUBREG_REG (op0)))
4990 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4992 /* Fall through. */
4993 case REG:
4994 /* const0_rtx is in general free, but we will use an
4995 instruction to set a register to 0. */
4996 if (REG_P (op1) || op1 == const0_rtx)
4998 /* The cost is 1 per register copied. */
4999 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5000 / UNITS_PER_WORD;
5001 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5003 else
5004 /* Cost is just the cost of the RHS of the set. */
5005 *cost += rtx_cost (op1, SET, 1, speed);
5006 return true;
5008 case ZERO_EXTRACT:
5009 case SIGN_EXTRACT:
5010 /* Bit-field insertion. Strip any redundant widening of
5011 the RHS to meet the width of the target. */
5012 if (GET_CODE (op1) == SUBREG)
5013 op1 = SUBREG_REG (op1);
5014 if ((GET_CODE (op1) == ZERO_EXTEND
5015 || GET_CODE (op1) == SIGN_EXTEND)
5016 && GET_CODE (XEXP (op0, 1)) == CONST_INT
5017 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5018 >= INTVAL (XEXP (op0, 1))))
5019 op1 = XEXP (op1, 0);
5021 if (CONST_INT_P (op1))
5023 /* MOV immediate is assumed to always be cheap. */
5024 *cost = COSTS_N_INSNS (1);
5026 else
5028 /* BFM. */
5029 if (speed)
5030 *cost += extra_cost->alu.bfi;
5031 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5034 return true;
5036 default:
5037 /* We can't make sense of this, assume default cost. */
5038 *cost = COSTS_N_INSNS (1);
5039 break;
5041 return false;
5043 case CONST_INT:
5044 /* If an instruction can incorporate a constant within the
5045 instruction, the instruction's expression avoids calling
5046 rtx_cost() on the constant. If rtx_cost() is called on a
5047 constant, then it is usually because the constant must be
5048 moved into a register by one or more instructions.
5050 The exception is constant 0, which can be expressed
5051 as XZR/WZR and is therefore free. The exception to this is
5052 if we have (set (reg) (const0_rtx)) in which case we must cost
5053 the move. However, we can catch that when we cost the SET, so
5054 we don't need to consider that here. */
5055 if (x == const0_rtx)
5056 *cost = 0;
5057 else
5059 /* To an approximation, building any other constant is
5060 proportionally expensive to the number of instructions
5061 required to build that constant. This is true whether we
5062 are compiling for SPEED or otherwise. */
5063 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5064 INTVAL (x),
5065 false));
5067 return true;
5069 case CONST_DOUBLE:
5070 if (speed)
5072 /* mov[df,sf]_aarch64. */
5073 if (aarch64_float_const_representable_p (x))
5074 /* FMOV (scalar immediate). */
5075 *cost += extra_cost->fp[mode == DFmode].fpconst;
5076 else if (!aarch64_float_const_zero_rtx_p (x))
5078 /* This will be a load from memory. */
5079 if (mode == DFmode)
5080 *cost += extra_cost->ldst.loadd;
5081 else
5082 *cost += extra_cost->ldst.loadf;
5084 else
5085 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5086 or MOV v0.s[0], wzr - neither of which are modeled by the
5087 cost tables. Just use the default cost. */
5092 return true;
5094 case MEM:
5095 if (speed)
5097 /* For loads we want the base cost of a load, plus an
5098 approximation for the additional cost of the addressing
5099 mode. */
5100 rtx address = XEXP (x, 0);
5101 if (GET_MODE_CLASS (mode) == MODE_INT)
5102 *cost += extra_cost->ldst.load;
5103 else if (mode == SFmode)
5104 *cost += extra_cost->ldst.loadf;
5105 else if (mode == DFmode)
5106 *cost += extra_cost->ldst.loadd;
5108 *cost +=
5109 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5110 0, speed));
5113 return true;
5115 case NEG:
5116 op0 = XEXP (x, 0);
5118 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5120 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5121 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5123 /* CSETM. */
5124 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5125 return true;
5128 /* Cost this as SUB wzr, X. */
5129 op0 = CONST0_RTX (GET_MODE (x));
5130 op1 = XEXP (x, 0);
5131 goto cost_minus;
5134 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5136 /* Support (neg(fma...)) as a single instruction only if
5137 sign of zeros is unimportant. This matches the decision
5138 making in aarch64.md. */
5139 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5141 /* FNMADD. */
5142 *cost = rtx_cost (op0, NEG, 0, speed);
5143 return true;
5145 if (speed)
5146 /* FNEG. */
5147 *cost += extra_cost->fp[mode == DFmode].neg;
5148 return false;
5151 return false;
5153 case COMPARE:
5154 op0 = XEXP (x, 0);
5155 op1 = XEXP (x, 1);
5157 if (op1 == const0_rtx
5158 && GET_CODE (op0) == AND)
5160 x = op0;
5161 goto cost_logic;
5164 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5166 /* TODO: A write to the CC flags possibly costs extra, this
5167 needs encoding in the cost tables. */
5169 /* CC_ZESWPmode supports zero extend for free. */
5170 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5171 op0 = XEXP (op0, 0);
5173 /* ANDS. */
5174 if (GET_CODE (op0) == AND)
5176 x = op0;
5177 goto cost_logic;
5180 if (GET_CODE (op0) == PLUS)
5182 /* ADDS (and CMN alias). */
5183 x = op0;
5184 goto cost_plus;
5187 if (GET_CODE (op0) == MINUS)
5189 /* SUBS. */
5190 x = op0;
5191 goto cost_minus;
5194 if (GET_CODE (op1) == NEG)
5196 /* CMN. */
5197 if (speed)
5198 *cost += extra_cost->alu.arith;
5200 *cost += rtx_cost (op0, COMPARE, 0, speed);
5201 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5202 return true;
5205 /* CMP.
5207 Compare can freely swap the order of operands, and
5208 canonicalization puts the more complex operation first.
5209 But the integer MINUS logic expects the shift/extend
5210 operation in op1. */
5211 if (! (REG_P (op0)
5212 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5214 op0 = XEXP (x, 1);
5215 op1 = XEXP (x, 0);
5217 goto cost_minus;
5220 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5222 /* FCMP. */
5223 if (speed)
5224 *cost += extra_cost->fp[mode == DFmode].compare;
5226 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5228 /* FCMP supports constant 0.0 for no extra cost. */
5229 return true;
5231 return false;
5234 return false;
5236 case MINUS:
5238 op0 = XEXP (x, 0);
5239 op1 = XEXP (x, 1);
5241 cost_minus:
5242 /* Detect valid immediates. */
5243 if ((GET_MODE_CLASS (mode) == MODE_INT
5244 || (GET_MODE_CLASS (mode) == MODE_CC
5245 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5246 && CONST_INT_P (op1)
5247 && aarch64_uimm12_shift (INTVAL (op1)))
5249 *cost += rtx_cost (op0, MINUS, 0, speed);
5251 if (speed)
5252 /* SUB(S) (immediate). */
5253 *cost += extra_cost->alu.arith;
5254 return true;
5258 /* Look for SUB (extended register). */
5259 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5261 if (speed)
5262 *cost += extra_cost->alu.arith_shift;
5264 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5265 (enum rtx_code) GET_CODE (op1),
5266 0, speed);
5267 return true;
5270 rtx new_op1 = aarch64_strip_extend (op1);
5272 /* Cost this as an FMA-alike operation. */
5273 if ((GET_CODE (new_op1) == MULT
5274 || GET_CODE (new_op1) == ASHIFT)
5275 && code != COMPARE)
5277 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5278 (enum rtx_code) code,
5279 speed);
5280 *cost += rtx_cost (op0, MINUS, 0, speed);
5281 return true;
5284 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5286 if (speed)
5288 if (GET_MODE_CLASS (mode) == MODE_INT)
5289 /* SUB(S). */
5290 *cost += extra_cost->alu.arith;
5291 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5292 /* FSUB. */
5293 *cost += extra_cost->fp[mode == DFmode].addsub;
5295 return true;
5298 case PLUS:
5300 rtx new_op0;
5302 op0 = XEXP (x, 0);
5303 op1 = XEXP (x, 1);
5305 cost_plus:
5306 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5307 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5309 /* CSINC. */
5310 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5311 *cost += rtx_cost (op1, PLUS, 1, speed);
5312 return true;
5315 if (GET_MODE_CLASS (mode) == MODE_INT
5316 && CONST_INT_P (op1)
5317 && aarch64_uimm12_shift (INTVAL (op1)))
5319 *cost += rtx_cost (op0, PLUS, 0, speed);
5321 if (speed)
5322 /* ADD (immediate). */
5323 *cost += extra_cost->alu.arith;
5324 return true;
5327 /* Look for ADD (extended register). */
5328 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5330 if (speed)
5331 *cost += extra_cost->alu.arith_shift;
5333 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5334 (enum rtx_code) GET_CODE (op0),
5335 0, speed);
5336 return true;
5339 /* Strip any extend, leave shifts behind as we will
5340 cost them through mult_cost. */
5341 new_op0 = aarch64_strip_extend (op0);
5343 if (GET_CODE (new_op0) == MULT
5344 || GET_CODE (new_op0) == ASHIFT)
5346 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5347 speed);
5348 *cost += rtx_cost (op1, PLUS, 1, speed);
5349 return true;
5352 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5353 + rtx_cost (op1, PLUS, 1, speed));
5355 if (speed)
5357 if (GET_MODE_CLASS (mode) == MODE_INT)
5358 /* ADD. */
5359 *cost += extra_cost->alu.arith;
5360 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5361 /* FADD. */
5362 *cost += extra_cost->fp[mode == DFmode].addsub;
5364 return true;
5367 case BSWAP:
5368 *cost = COSTS_N_INSNS (1);
5370 if (speed)
5371 *cost += extra_cost->alu.rev;
5373 return false;
5375 case IOR:
5376 if (aarch_rev16_p (x))
5378 *cost = COSTS_N_INSNS (1);
5380 if (speed)
5381 *cost += extra_cost->alu.rev;
5383 return true;
5385 /* Fall through. */
5386 case XOR:
5387 case AND:
5388 cost_logic:
5389 op0 = XEXP (x, 0);
5390 op1 = XEXP (x, 1);
5392 if (code == AND
5393 && GET_CODE (op0) == MULT
5394 && CONST_INT_P (XEXP (op0, 1))
5395 && CONST_INT_P (op1)
5396 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5397 INTVAL (op1)) != 0)
5399 /* This is a UBFM/SBFM. */
5400 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5401 if (speed)
5402 *cost += extra_cost->alu.bfx;
5403 return true;
5406 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5408 /* We possibly get the immediate for free, this is not
5409 modelled. */
5410 if (CONST_INT_P (op1)
5411 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5413 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5415 if (speed)
5416 *cost += extra_cost->alu.logical;
5418 return true;
5420 else
5422 rtx new_op0 = op0;
5424 /* Handle ORN, EON, or BIC. */
5425 if (GET_CODE (op0) == NOT)
5426 op0 = XEXP (op0, 0);
5428 new_op0 = aarch64_strip_shift (op0);
5430 /* If we had a shift on op0 then this is a logical-shift-
5431 by-register/immediate operation. Otherwise, this is just
5432 a logical operation. */
5433 if (speed)
5435 if (new_op0 != op0)
5437 /* Shift by immediate. */
5438 if (CONST_INT_P (XEXP (op0, 1)))
5439 *cost += extra_cost->alu.log_shift;
5440 else
5441 *cost += extra_cost->alu.log_shift_reg;
5443 else
5444 *cost += extra_cost->alu.logical;
5447 /* In both cases we want to cost both operands. */
5448 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5449 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5451 return true;
5454 return false;
5456 case NOT:
5457 /* MVN. */
5458 if (speed)
5459 *cost += extra_cost->alu.logical;
5461 /* The logical instruction could have the shifted register form,
5462 but the cost is the same if the shift is processed as a separate
5463 instruction, so we don't bother with it here. */
5464 return false;
5466 case ZERO_EXTEND:
5468 op0 = XEXP (x, 0);
5469 /* If a value is written in SI mode, then zero extended to DI
5470 mode, the operation will in general be free as a write to
5471 a 'w' register implicitly zeroes the upper bits of an 'x'
5472 register. However, if this is
5474 (set (reg) (zero_extend (reg)))
5476 we must cost the explicit register move. */
5477 if (mode == DImode
5478 && GET_MODE (op0) == SImode
5479 && outer == SET)
5481 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5483 if (!op_cost && speed)
5484 /* MOV. */
5485 *cost += extra_cost->alu.extend;
5486 else
5487 /* Free, the cost is that of the SI mode operation. */
5488 *cost = op_cost;
5490 return true;
5492 else if (MEM_P (XEXP (x, 0)))
5494 /* All loads can zero extend to any size for free. */
5495 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5496 return true;
5499 /* UXTB/UXTH. */
5500 if (speed)
5501 *cost += extra_cost->alu.extend;
5503 return false;
5505 case SIGN_EXTEND:
5506 if (MEM_P (XEXP (x, 0)))
5508 /* LDRSH. */
5509 if (speed)
5511 rtx address = XEXP (XEXP (x, 0), 0);
5512 *cost += extra_cost->ldst.load_sign_extend;
5514 *cost +=
5515 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5516 0, speed));
5518 return true;
5521 if (speed)
5522 *cost += extra_cost->alu.extend;
5523 return false;
5525 case ASHIFT:
5526 op0 = XEXP (x, 0);
5527 op1 = XEXP (x, 1);
5529 if (CONST_INT_P (op1))
5531 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5532 aliases. */
5533 if (speed)
5534 *cost += extra_cost->alu.shift;
5536 /* We can incorporate zero/sign extend for free. */
5537 if (GET_CODE (op0) == ZERO_EXTEND
5538 || GET_CODE (op0) == SIGN_EXTEND)
5539 op0 = XEXP (op0, 0);
5541 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5542 return true;
5544 else
5546 /* LSLV. */
5547 if (speed)
5548 *cost += extra_cost->alu.shift_reg;
5550 return false; /* All arguments need to be in registers. */
5553 case ROTATE:
5554 case ROTATERT:
5555 case LSHIFTRT:
5556 case ASHIFTRT:
5557 op0 = XEXP (x, 0);
5558 op1 = XEXP (x, 1);
5560 if (CONST_INT_P (op1))
5562 /* ASR (immediate) and friends. */
5563 if (speed)
5564 *cost += extra_cost->alu.shift;
5566 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5567 return true;
5569 else
5572 /* ASR (register) and friends. */
5573 if (speed)
5574 *cost += extra_cost->alu.shift_reg;
5576 return false; /* All arguments need to be in registers. */
5579 case SYMBOL_REF:
5581 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5583 /* LDR. */
5584 if (speed)
5585 *cost += extra_cost->ldst.load;
5587 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5588 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5590 /* ADRP, followed by ADD. */
5591 *cost += COSTS_N_INSNS (1);
5592 if (speed)
5593 *cost += 2 * extra_cost->alu.arith;
5595 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5596 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5598 /* ADR. */
5599 if (speed)
5600 *cost += extra_cost->alu.arith;
5603 if (flag_pic)
5605 /* One extra load instruction, after accessing the GOT. */
5606 *cost += COSTS_N_INSNS (1);
5607 if (speed)
5608 *cost += extra_cost->ldst.load;
5610 return true;
5612 case HIGH:
5613 case LO_SUM:
5614 /* ADRP/ADD (immediate). */
5615 if (speed)
5616 *cost += extra_cost->alu.arith;
5617 return true;
5619 case ZERO_EXTRACT:
5620 case SIGN_EXTRACT:
5621 /* UBFX/SBFX. */
5622 if (speed)
5623 *cost += extra_cost->alu.bfx;
5625 /* We can trust that the immediates used will be correct (there
5626 are no by-register forms), so we need only cost op0. */
5627 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5628 return true;
5630 case MULT:
5631 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5632 /* aarch64_rtx_mult_cost always handles recursion to its
5633 operands. */
5634 return true;
5636 case MOD:
5637 case UMOD:
5638 if (speed)
5640 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5641 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5642 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5643 else if (GET_MODE (x) == DFmode)
5644 *cost += (extra_cost->fp[1].mult
5645 + extra_cost->fp[1].div);
5646 else if (GET_MODE (x) == SFmode)
5647 *cost += (extra_cost->fp[0].mult
5648 + extra_cost->fp[0].div);
5650 return false; /* All arguments need to be in registers. */
5652 case DIV:
5653 case UDIV:
5654 case SQRT:
5655 if (speed)
5657 if (GET_MODE_CLASS (mode) == MODE_INT)
5658 /* There is no integer SQRT, so only DIV and UDIV can get
5659 here. */
5660 *cost += extra_cost->mult[mode == DImode].idiv;
5661 else
5662 *cost += extra_cost->fp[mode == DFmode].div;
5664 return false; /* All arguments need to be in registers. */
5666 case IF_THEN_ELSE:
5667 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5668 XEXP (x, 2), cost, speed);
5670 case EQ:
5671 case NE:
5672 case GT:
5673 case GTU:
5674 case LT:
5675 case LTU:
5676 case GE:
5677 case GEU:
5678 case LE:
5679 case LEU:
5681 return false; /* All arguments must be in registers. */
5683 case FMA:
5684 op0 = XEXP (x, 0);
5685 op1 = XEXP (x, 1);
5686 op2 = XEXP (x, 2);
5688 if (speed)
5689 *cost += extra_cost->fp[mode == DFmode].fma;
5691 /* FMSUB, FNMADD, and FNMSUB are free. */
5692 if (GET_CODE (op0) == NEG)
5693 op0 = XEXP (op0, 0);
5695 if (GET_CODE (op2) == NEG)
5696 op2 = XEXP (op2, 0);
5698 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5699 and the by-element operand as operand 0. */
5700 if (GET_CODE (op1) == NEG)
5701 op1 = XEXP (op1, 0);
5703 /* Catch vector-by-element operations. The by-element operand can
5704 either be (vec_duplicate (vec_select (x))) or just
5705 (vec_select (x)), depending on whether we are multiplying by
5706 a vector or a scalar.
5708 Canonicalization is not very good in these cases, FMA4 will put the
5709 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5710 if (GET_CODE (op0) == VEC_DUPLICATE)
5711 op0 = XEXP (op0, 0);
5712 else if (GET_CODE (op1) == VEC_DUPLICATE)
5713 op1 = XEXP (op1, 0);
5715 if (GET_CODE (op0) == VEC_SELECT)
5716 op0 = XEXP (op0, 0);
5717 else if (GET_CODE (op1) == VEC_SELECT)
5718 op1 = XEXP (op1, 0);
5720 /* If the remaining parameters are not registers,
5721 get the cost to put them into registers. */
5722 *cost += rtx_cost (op0, FMA, 0, speed);
5723 *cost += rtx_cost (op1, FMA, 1, speed);
5724 *cost += rtx_cost (op2, FMA, 2, speed);
5725 return true;
5727 case FLOAT_EXTEND:
5728 if (speed)
5729 *cost += extra_cost->fp[mode == DFmode].widen;
5730 return false;
5732 case FLOAT_TRUNCATE:
5733 if (speed)
5734 *cost += extra_cost->fp[mode == DFmode].narrow;
5735 return false;
5737 case ABS:
5738 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5740 /* FABS and FNEG are analogous. */
5741 if (speed)
5742 *cost += extra_cost->fp[mode == DFmode].neg;
5744 else
5746 /* Integer ABS will either be split to
5747 two arithmetic instructions, or will be an ABS
5748 (scalar), which we don't model. */
5749 *cost = COSTS_N_INSNS (2);
5750 if (speed)
5751 *cost += 2 * extra_cost->alu.arith;
5753 return false;
5755 case SMAX:
5756 case SMIN:
5757 if (speed)
5759 /* FMAXNM/FMINNM/FMAX/FMIN.
5760 TODO: This may not be accurate for all implementations, but
5761 we do not model this in the cost tables. */
5762 *cost += extra_cost->fp[mode == DFmode].addsub;
5764 return false;
5766 case TRUNCATE:
5768 /* Decompose <su>muldi3_highpart. */
5769 if (/* (truncate:DI */
5770 mode == DImode
5771 /* (lshiftrt:TI */
5772 && GET_MODE (XEXP (x, 0)) == TImode
5773 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5774 /* (mult:TI */
5775 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5776 /* (ANY_EXTEND:TI (reg:DI))
5777 (ANY_EXTEND:TI (reg:DI))) */
5778 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5779 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5780 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5781 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5782 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5783 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5784 /* (const_int 64) */
5785 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5786 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5788 /* UMULH/SMULH. */
5789 if (speed)
5790 *cost += extra_cost->mult[mode == DImode].extend;
5791 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5792 MULT, 0, speed);
5793 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5794 MULT, 1, speed);
5795 return true;
5798 /* Fall through. */
5799 default:
5800 if (dump_file && (dump_flags & TDF_DETAILS))
5801 fprintf (dump_file,
5802 "\nFailed to cost RTX. Assuming default cost.\n");
5804 return true;
5806 return false;
5809 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5810 calculated for X. This cost is stored in *COST. Returns true
5811 if the total cost of X was calculated. */
5812 static bool
5813 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5814 int param, int *cost, bool speed)
5816 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5818 if (dump_file && (dump_flags & TDF_DETAILS))
5820 print_rtl_single (dump_file, x);
5821 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5822 speed ? "Hot" : "Cold",
5823 *cost, result ? "final" : "partial");
5826 return result;
5829 static int
5830 aarch64_register_move_cost (enum machine_mode mode,
5831 reg_class_t from_i, reg_class_t to_i)
5833 enum reg_class from = (enum reg_class) from_i;
5834 enum reg_class to = (enum reg_class) to_i;
5835 const struct cpu_regmove_cost *regmove_cost
5836 = aarch64_tune_params->regmove_cost;
5838 /* Moving between GPR and stack cost is the same as GP2GP. */
5839 if ((from == GENERAL_REGS && to == STACK_REG)
5840 || (to == GENERAL_REGS && from == STACK_REG))
5841 return regmove_cost->GP2GP;
5843 /* To/From the stack register, we move via the gprs. */
5844 if (to == STACK_REG || from == STACK_REG)
5845 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5846 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5848 if (from == GENERAL_REGS && to == GENERAL_REGS)
5849 return regmove_cost->GP2GP;
5850 else if (from == GENERAL_REGS)
5851 return regmove_cost->GP2FP;
5852 else if (to == GENERAL_REGS)
5853 return regmove_cost->FP2GP;
5855 /* When AdvSIMD instructions are disabled it is not possible to move
5856 a 128-bit value directly between Q registers. This is handled in
5857 secondary reload. A general register is used as a scratch to move
5858 the upper DI value and the lower DI value is moved directly,
5859 hence the cost is the sum of three moves. */
5860 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5861 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5863 return regmove_cost->FP2FP;
5866 static int
5867 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5868 reg_class_t rclass ATTRIBUTE_UNUSED,
5869 bool in ATTRIBUTE_UNUSED)
5871 return aarch64_tune_params->memmov_cost;
5874 /* Return the number of instructions that can be issued per cycle. */
5875 static int
5876 aarch64_sched_issue_rate (void)
5878 return aarch64_tune_params->issue_rate;
5881 /* Vectorizer cost model target hooks. */
5883 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5884 static int
5885 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5886 tree vectype,
5887 int misalign ATTRIBUTE_UNUSED)
5889 unsigned elements;
5891 switch (type_of_cost)
5893 case scalar_stmt:
5894 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5896 case scalar_load:
5897 return aarch64_tune_params->vec_costs->scalar_load_cost;
5899 case scalar_store:
5900 return aarch64_tune_params->vec_costs->scalar_store_cost;
5902 case vector_stmt:
5903 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5905 case vector_load:
5906 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5908 case vector_store:
5909 return aarch64_tune_params->vec_costs->vec_store_cost;
5911 case vec_to_scalar:
5912 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5914 case scalar_to_vec:
5915 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5917 case unaligned_load:
5918 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5920 case unaligned_store:
5921 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5923 case cond_branch_taken:
5924 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5926 case cond_branch_not_taken:
5927 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5929 case vec_perm:
5930 case vec_promote_demote:
5931 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5933 case vec_construct:
5934 elements = TYPE_VECTOR_SUBPARTS (vectype);
5935 return elements / 2 + 1;
5937 default:
5938 gcc_unreachable ();
5942 /* Implement targetm.vectorize.add_stmt_cost. */
5943 static unsigned
5944 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5945 struct _stmt_vec_info *stmt_info, int misalign,
5946 enum vect_cost_model_location where)
5948 unsigned *cost = (unsigned *) data;
5949 unsigned retval = 0;
5951 if (flag_vect_cost_model)
5953 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5954 int stmt_cost =
5955 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5957 /* Statements in an inner loop relative to the loop being
5958 vectorized are weighted more heavily. The value here is
5959 a function (linear for now) of the loop nest level. */
5960 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5962 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5963 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5964 unsigned nest_level = loop_depth (loop);
5966 count *= nest_level;
5969 retval = (unsigned) (count * stmt_cost);
5970 cost[where] += retval;
5973 return retval;
5976 static void initialize_aarch64_code_model (void);
5978 /* Parse the architecture extension string. */
5980 static void
5981 aarch64_parse_extension (char *str)
5983 /* The extension string is parsed left to right. */
5984 const struct aarch64_option_extension *opt = NULL;
5986 /* Flag to say whether we are adding or removing an extension. */
5987 int adding_ext = -1;
5989 while (str != NULL && *str != 0)
5991 char *ext;
5992 size_t len;
5994 str++;
5995 ext = strchr (str, '+');
5997 if (ext != NULL)
5998 len = ext - str;
5999 else
6000 len = strlen (str);
6002 if (len >= 2 && strncmp (str, "no", 2) == 0)
6004 adding_ext = 0;
6005 len -= 2;
6006 str += 2;
6008 else if (len > 0)
6009 adding_ext = 1;
6011 if (len == 0)
6013 error ("missing feature modifier after %qs", "+no");
6014 return;
6017 /* Scan over the extensions table trying to find an exact match. */
6018 for (opt = all_extensions; opt->name != NULL; opt++)
6020 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6022 /* Add or remove the extension. */
6023 if (adding_ext)
6024 aarch64_isa_flags |= opt->flags_on;
6025 else
6026 aarch64_isa_flags &= ~(opt->flags_off);
6027 break;
6031 if (opt->name == NULL)
6033 /* Extension not found in list. */
6034 error ("unknown feature modifier %qs", str);
6035 return;
6038 str = ext;
6041 return;
6044 /* Parse the ARCH string. */
6046 static void
6047 aarch64_parse_arch (void)
6049 char *ext;
6050 const struct processor *arch;
6051 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6052 size_t len;
6054 strcpy (str, aarch64_arch_string);
6056 ext = strchr (str, '+');
6058 if (ext != NULL)
6059 len = ext - str;
6060 else
6061 len = strlen (str);
6063 if (len == 0)
6065 error ("missing arch name in -march=%qs", str);
6066 return;
6069 /* Loop through the list of supported ARCHs to find a match. */
6070 for (arch = all_architectures; arch->name != NULL; arch++)
6072 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6074 selected_arch = arch;
6075 aarch64_isa_flags = selected_arch->flags;
6077 if (!selected_cpu)
6078 selected_cpu = &all_cores[selected_arch->core];
6080 if (ext != NULL)
6082 /* ARCH string contains at least one extension. */
6083 aarch64_parse_extension (ext);
6086 if (strcmp (selected_arch->arch, selected_cpu->arch))
6088 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6089 selected_cpu->name, selected_arch->name);
6092 return;
6096 /* ARCH name not found in list. */
6097 error ("unknown value %qs for -march", str);
6098 return;
6101 /* Parse the CPU string. */
6103 static void
6104 aarch64_parse_cpu (void)
6106 char *ext;
6107 const struct processor *cpu;
6108 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6109 size_t len;
6111 strcpy (str, aarch64_cpu_string);
6113 ext = strchr (str, '+');
6115 if (ext != NULL)
6116 len = ext - str;
6117 else
6118 len = strlen (str);
6120 if (len == 0)
6122 error ("missing cpu name in -mcpu=%qs", str);
6123 return;
6126 /* Loop through the list of supported CPUs to find a match. */
6127 for (cpu = all_cores; cpu->name != NULL; cpu++)
6129 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6131 selected_cpu = cpu;
6132 selected_tune = cpu;
6133 aarch64_isa_flags = selected_cpu->flags;
6135 if (ext != NULL)
6137 /* CPU string contains at least one extension. */
6138 aarch64_parse_extension (ext);
6141 return;
6145 /* CPU name not found in list. */
6146 error ("unknown value %qs for -mcpu", str);
6147 return;
6150 /* Parse the TUNE string. */
6152 static void
6153 aarch64_parse_tune (void)
6155 const struct processor *cpu;
6156 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6157 strcpy (str, aarch64_tune_string);
6159 /* Loop through the list of supported CPUs to find a match. */
6160 for (cpu = all_cores; cpu->name != NULL; cpu++)
6162 if (strcmp (cpu->name, str) == 0)
6164 selected_tune = cpu;
6165 return;
6169 /* CPU name not found in list. */
6170 error ("unknown value %qs for -mtune", str);
6171 return;
6175 /* Implement TARGET_OPTION_OVERRIDE. */
6177 static void
6178 aarch64_override_options (void)
6180 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6181 If either of -march or -mtune is given, they override their
6182 respective component of -mcpu.
6184 So, first parse AARCH64_CPU_STRING, then the others, be careful
6185 with -march as, if -mcpu is not present on the command line, march
6186 must set a sensible default CPU. */
6187 if (aarch64_cpu_string)
6189 aarch64_parse_cpu ();
6192 if (aarch64_arch_string)
6194 aarch64_parse_arch ();
6197 if (aarch64_tune_string)
6199 aarch64_parse_tune ();
6202 #ifndef HAVE_AS_MABI_OPTION
6203 /* The compiler may have been configured with 2.23.* binutils, which does
6204 not have support for ILP32. */
6205 if (TARGET_ILP32)
6206 error ("Assembler does not support -mabi=ilp32");
6207 #endif
6209 initialize_aarch64_code_model ();
6211 aarch64_build_bitmask_table ();
6213 /* This target defaults to strict volatile bitfields. */
6214 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6215 flag_strict_volatile_bitfields = 1;
6217 /* If the user did not specify a processor, choose the default
6218 one for them. This will be the CPU set during configuration using
6219 --with-cpu, otherwise it is "generic". */
6220 if (!selected_cpu)
6222 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6223 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6226 gcc_assert (selected_cpu);
6228 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6229 if (!selected_tune)
6230 selected_tune = &all_cores[selected_cpu->core];
6232 aarch64_tune_flags = selected_tune->flags;
6233 aarch64_tune = selected_tune->core;
6234 aarch64_tune_params = selected_tune->tune;
6236 aarch64_override_options_after_change ();
6239 /* Implement targetm.override_options_after_change. */
6241 static void
6242 aarch64_override_options_after_change (void)
6244 if (flag_omit_frame_pointer)
6245 flag_omit_leaf_frame_pointer = false;
6246 else if (flag_omit_leaf_frame_pointer)
6247 flag_omit_frame_pointer = true;
6250 static struct machine_function *
6251 aarch64_init_machine_status (void)
6253 struct machine_function *machine;
6254 machine = ggc_alloc_cleared_machine_function ();
6255 return machine;
6258 void
6259 aarch64_init_expanders (void)
6261 init_machine_status = aarch64_init_machine_status;
6264 /* A checking mechanism for the implementation of the various code models. */
6265 static void
6266 initialize_aarch64_code_model (void)
6268 if (flag_pic)
6270 switch (aarch64_cmodel_var)
6272 case AARCH64_CMODEL_TINY:
6273 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6274 break;
6275 case AARCH64_CMODEL_SMALL:
6276 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6277 break;
6278 case AARCH64_CMODEL_LARGE:
6279 sorry ("code model %qs with -f%s", "large",
6280 flag_pic > 1 ? "PIC" : "pic");
6281 default:
6282 gcc_unreachable ();
6285 else
6286 aarch64_cmodel = aarch64_cmodel_var;
6289 /* Return true if SYMBOL_REF X binds locally. */
6291 static bool
6292 aarch64_symbol_binds_local_p (const_rtx x)
6294 return (SYMBOL_REF_DECL (x)
6295 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6296 : SYMBOL_REF_LOCAL_P (x));
6299 /* Return true if SYMBOL_REF X is thread local */
6300 static bool
6301 aarch64_tls_symbol_p (rtx x)
6303 if (! TARGET_HAVE_TLS)
6304 return false;
6306 if (GET_CODE (x) != SYMBOL_REF)
6307 return false;
6309 return SYMBOL_REF_TLS_MODEL (x) != 0;
6312 /* Classify a TLS symbol into one of the TLS kinds. */
6313 enum aarch64_symbol_type
6314 aarch64_classify_tls_symbol (rtx x)
6316 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6318 switch (tls_kind)
6320 case TLS_MODEL_GLOBAL_DYNAMIC:
6321 case TLS_MODEL_LOCAL_DYNAMIC:
6322 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6324 case TLS_MODEL_INITIAL_EXEC:
6325 return SYMBOL_SMALL_GOTTPREL;
6327 case TLS_MODEL_LOCAL_EXEC:
6328 return SYMBOL_SMALL_TPREL;
6330 case TLS_MODEL_EMULATED:
6331 case TLS_MODEL_NONE:
6332 return SYMBOL_FORCE_TO_MEM;
6334 default:
6335 gcc_unreachable ();
6339 /* Return the method that should be used to access SYMBOL_REF or
6340 LABEL_REF X in context CONTEXT. */
6342 enum aarch64_symbol_type
6343 aarch64_classify_symbol (rtx x,
6344 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6346 if (GET_CODE (x) == LABEL_REF)
6348 switch (aarch64_cmodel)
6350 case AARCH64_CMODEL_LARGE:
6351 return SYMBOL_FORCE_TO_MEM;
6353 case AARCH64_CMODEL_TINY_PIC:
6354 case AARCH64_CMODEL_TINY:
6355 return SYMBOL_TINY_ABSOLUTE;
6357 case AARCH64_CMODEL_SMALL_PIC:
6358 case AARCH64_CMODEL_SMALL:
6359 return SYMBOL_SMALL_ABSOLUTE;
6361 default:
6362 gcc_unreachable ();
6366 if (GET_CODE (x) == SYMBOL_REF)
6368 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6369 return SYMBOL_FORCE_TO_MEM;
6371 if (aarch64_tls_symbol_p (x))
6372 return aarch64_classify_tls_symbol (x);
6374 switch (aarch64_cmodel)
6376 case AARCH64_CMODEL_TINY:
6377 if (SYMBOL_REF_WEAK (x))
6378 return SYMBOL_FORCE_TO_MEM;
6379 return SYMBOL_TINY_ABSOLUTE;
6381 case AARCH64_CMODEL_SMALL:
6382 if (SYMBOL_REF_WEAK (x))
6383 return SYMBOL_FORCE_TO_MEM;
6384 return SYMBOL_SMALL_ABSOLUTE;
6386 case AARCH64_CMODEL_TINY_PIC:
6387 if (!aarch64_symbol_binds_local_p (x))
6388 return SYMBOL_TINY_GOT;
6389 return SYMBOL_TINY_ABSOLUTE;
6391 case AARCH64_CMODEL_SMALL_PIC:
6392 if (!aarch64_symbol_binds_local_p (x))
6393 return SYMBOL_SMALL_GOT;
6394 return SYMBOL_SMALL_ABSOLUTE;
6396 default:
6397 gcc_unreachable ();
6401 /* By default push everything into the constant pool. */
6402 return SYMBOL_FORCE_TO_MEM;
6405 bool
6406 aarch64_constant_address_p (rtx x)
6408 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6411 bool
6412 aarch64_legitimate_pic_operand_p (rtx x)
6414 if (GET_CODE (x) == SYMBOL_REF
6415 || (GET_CODE (x) == CONST
6416 && GET_CODE (XEXP (x, 0)) == PLUS
6417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6418 return false;
6420 return true;
6423 /* Return true if X holds either a quarter-precision or
6424 floating-point +0.0 constant. */
6425 static bool
6426 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6428 if (!CONST_DOUBLE_P (x))
6429 return false;
6431 /* TODO: We could handle moving 0.0 to a TFmode register,
6432 but first we would like to refactor the movtf_aarch64
6433 to be more amicable to split moves properly and
6434 correctly gate on TARGET_SIMD. For now - reject all
6435 constants which are not to SFmode or DFmode registers. */
6436 if (!(mode == SFmode || mode == DFmode))
6437 return false;
6439 if (aarch64_float_const_zero_rtx_p (x))
6440 return true;
6441 return aarch64_float_const_representable_p (x);
6444 static bool
6445 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6447 /* Do not allow vector struct mode constants. We could support
6448 0 and -1 easily, but they need support in aarch64-simd.md. */
6449 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6450 return false;
6452 /* This could probably go away because
6453 we now decompose CONST_INTs according to expand_mov_immediate. */
6454 if ((GET_CODE (x) == CONST_VECTOR
6455 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6456 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6457 return !targetm.cannot_force_const_mem (mode, x);
6459 if (GET_CODE (x) == HIGH
6460 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6461 return true;
6463 return aarch64_constant_address_p (x);
6467 aarch64_load_tp (rtx target)
6469 if (!target
6470 || GET_MODE (target) != Pmode
6471 || !register_operand (target, Pmode))
6472 target = gen_reg_rtx (Pmode);
6474 /* Can return in any reg. */
6475 emit_insn (gen_aarch64_load_tp_hard (target));
6476 return target;
6479 /* On AAPCS systems, this is the "struct __va_list". */
6480 static GTY(()) tree va_list_type;
6482 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6483 Return the type to use as __builtin_va_list.
6485 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6487 struct __va_list
6489 void *__stack;
6490 void *__gr_top;
6491 void *__vr_top;
6492 int __gr_offs;
6493 int __vr_offs;
6494 }; */
6496 static tree
6497 aarch64_build_builtin_va_list (void)
6499 tree va_list_name;
6500 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6502 /* Create the type. */
6503 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6504 /* Give it the required name. */
6505 va_list_name = build_decl (BUILTINS_LOCATION,
6506 TYPE_DECL,
6507 get_identifier ("__va_list"),
6508 va_list_type);
6509 DECL_ARTIFICIAL (va_list_name) = 1;
6510 TYPE_NAME (va_list_type) = va_list_name;
6511 TYPE_STUB_DECL (va_list_type) = va_list_name;
6513 /* Create the fields. */
6514 f_stack = build_decl (BUILTINS_LOCATION,
6515 FIELD_DECL, get_identifier ("__stack"),
6516 ptr_type_node);
6517 f_grtop = build_decl (BUILTINS_LOCATION,
6518 FIELD_DECL, get_identifier ("__gr_top"),
6519 ptr_type_node);
6520 f_vrtop = build_decl (BUILTINS_LOCATION,
6521 FIELD_DECL, get_identifier ("__vr_top"),
6522 ptr_type_node);
6523 f_groff = build_decl (BUILTINS_LOCATION,
6524 FIELD_DECL, get_identifier ("__gr_offs"),
6525 integer_type_node);
6526 f_vroff = build_decl (BUILTINS_LOCATION,
6527 FIELD_DECL, get_identifier ("__vr_offs"),
6528 integer_type_node);
6530 DECL_ARTIFICIAL (f_stack) = 1;
6531 DECL_ARTIFICIAL (f_grtop) = 1;
6532 DECL_ARTIFICIAL (f_vrtop) = 1;
6533 DECL_ARTIFICIAL (f_groff) = 1;
6534 DECL_ARTIFICIAL (f_vroff) = 1;
6536 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6537 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6538 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6539 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6540 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6542 TYPE_FIELDS (va_list_type) = f_stack;
6543 DECL_CHAIN (f_stack) = f_grtop;
6544 DECL_CHAIN (f_grtop) = f_vrtop;
6545 DECL_CHAIN (f_vrtop) = f_groff;
6546 DECL_CHAIN (f_groff) = f_vroff;
6548 /* Compute its layout. */
6549 layout_type (va_list_type);
6551 return va_list_type;
6554 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6555 static void
6556 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6558 const CUMULATIVE_ARGS *cum;
6559 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6560 tree stack, grtop, vrtop, groff, vroff;
6561 tree t;
6562 int gr_save_area_size;
6563 int vr_save_area_size;
6564 int vr_offset;
6566 cum = &crtl->args.info;
6567 gr_save_area_size
6568 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6569 vr_save_area_size
6570 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6572 if (TARGET_GENERAL_REGS_ONLY)
6574 if (cum->aapcs_nvrn > 0)
6575 sorry ("%qs and floating point or vector arguments",
6576 "-mgeneral-regs-only");
6577 vr_save_area_size = 0;
6580 f_stack = TYPE_FIELDS (va_list_type_node);
6581 f_grtop = DECL_CHAIN (f_stack);
6582 f_vrtop = DECL_CHAIN (f_grtop);
6583 f_groff = DECL_CHAIN (f_vrtop);
6584 f_vroff = DECL_CHAIN (f_groff);
6586 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6587 NULL_TREE);
6588 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6589 NULL_TREE);
6590 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6591 NULL_TREE);
6592 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6593 NULL_TREE);
6594 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6595 NULL_TREE);
6597 /* Emit code to initialize STACK, which points to the next varargs stack
6598 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6599 by named arguments. STACK is 8-byte aligned. */
6600 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6601 if (cum->aapcs_stack_size > 0)
6602 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6603 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6604 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6606 /* Emit code to initialize GRTOP, the top of the GR save area.
6607 virtual_incoming_args_rtx should have been 16 byte aligned. */
6608 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6609 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6610 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6612 /* Emit code to initialize VRTOP, the top of the VR save area.
6613 This address is gr_save_area_bytes below GRTOP, rounded
6614 down to the next 16-byte boundary. */
6615 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6616 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6617 STACK_BOUNDARY / BITS_PER_UNIT);
6619 if (vr_offset)
6620 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6621 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6624 /* Emit code to initialize GROFF, the offset from GRTOP of the
6625 next GPR argument. */
6626 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6627 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6628 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6630 /* Likewise emit code to initialize VROFF, the offset from FTOP
6631 of the next VR argument. */
6632 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6633 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6634 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6637 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6639 static tree
6640 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6641 gimple_seq *post_p ATTRIBUTE_UNUSED)
6643 tree addr;
6644 bool indirect_p;
6645 bool is_ha; /* is HFA or HVA. */
6646 bool dw_align; /* double-word align. */
6647 enum machine_mode ag_mode = VOIDmode;
6648 int nregs;
6649 enum machine_mode mode;
6651 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6652 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6653 HOST_WIDE_INT size, rsize, adjust, align;
6654 tree t, u, cond1, cond2;
6656 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6657 if (indirect_p)
6658 type = build_pointer_type (type);
6660 mode = TYPE_MODE (type);
6662 f_stack = TYPE_FIELDS (va_list_type_node);
6663 f_grtop = DECL_CHAIN (f_stack);
6664 f_vrtop = DECL_CHAIN (f_grtop);
6665 f_groff = DECL_CHAIN (f_vrtop);
6666 f_vroff = DECL_CHAIN (f_groff);
6668 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6669 f_stack, NULL_TREE);
6670 size = int_size_in_bytes (type);
6671 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6673 dw_align = false;
6674 adjust = 0;
6675 if (aarch64_vfp_is_call_or_return_candidate (mode,
6676 type,
6677 &ag_mode,
6678 &nregs,
6679 &is_ha))
6681 /* TYPE passed in fp/simd registers. */
6682 if (TARGET_GENERAL_REGS_ONLY)
6683 sorry ("%qs and floating point or vector arguments",
6684 "-mgeneral-regs-only");
6686 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6687 unshare_expr (valist), f_vrtop, NULL_TREE);
6688 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6689 unshare_expr (valist), f_vroff, NULL_TREE);
6691 rsize = nregs * UNITS_PER_VREG;
6693 if (is_ha)
6695 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6696 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6698 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6699 && size < UNITS_PER_VREG)
6701 adjust = UNITS_PER_VREG - size;
6704 else
6706 /* TYPE passed in general registers. */
6707 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6708 unshare_expr (valist), f_grtop, NULL_TREE);
6709 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6710 unshare_expr (valist), f_groff, NULL_TREE);
6711 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6712 nregs = rsize / UNITS_PER_WORD;
6714 if (align > 8)
6715 dw_align = true;
6717 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6718 && size < UNITS_PER_WORD)
6720 adjust = UNITS_PER_WORD - size;
6724 /* Get a local temporary for the field value. */
6725 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6727 /* Emit code to branch if off >= 0. */
6728 t = build2 (GE_EXPR, boolean_type_node, off,
6729 build_int_cst (TREE_TYPE (off), 0));
6730 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6732 if (dw_align)
6734 /* Emit: offs = (offs + 15) & -16. */
6735 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6736 build_int_cst (TREE_TYPE (off), 15));
6737 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6738 build_int_cst (TREE_TYPE (off), -16));
6739 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6741 else
6742 roundup = NULL;
6744 /* Update ap.__[g|v]r_offs */
6745 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6746 build_int_cst (TREE_TYPE (off), rsize));
6747 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6749 /* String up. */
6750 if (roundup)
6751 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6753 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6754 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6755 build_int_cst (TREE_TYPE (f_off), 0));
6756 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6758 /* String up: make sure the assignment happens before the use. */
6759 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6760 COND_EXPR_ELSE (cond1) = t;
6762 /* Prepare the trees handling the argument that is passed on the stack;
6763 the top level node will store in ON_STACK. */
6764 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6765 if (align > 8)
6767 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6768 t = fold_convert (intDI_type_node, arg);
6769 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6770 build_int_cst (TREE_TYPE (t), 15));
6771 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6772 build_int_cst (TREE_TYPE (t), -16));
6773 t = fold_convert (TREE_TYPE (arg), t);
6774 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6776 else
6777 roundup = NULL;
6778 /* Advance ap.__stack */
6779 t = fold_convert (intDI_type_node, arg);
6780 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6781 build_int_cst (TREE_TYPE (t), size + 7));
6782 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6783 build_int_cst (TREE_TYPE (t), -8));
6784 t = fold_convert (TREE_TYPE (arg), t);
6785 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6786 /* String up roundup and advance. */
6787 if (roundup)
6788 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6789 /* String up with arg */
6790 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6791 /* Big-endianness related address adjustment. */
6792 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6793 && size < UNITS_PER_WORD)
6795 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6796 size_int (UNITS_PER_WORD - size));
6797 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6800 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6801 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6803 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6804 t = off;
6805 if (adjust)
6806 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6807 build_int_cst (TREE_TYPE (off), adjust));
6809 t = fold_convert (sizetype, t);
6810 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6812 if (is_ha)
6814 /* type ha; // treat as "struct {ftype field[n];}"
6815 ... [computing offs]
6816 for (i = 0; i <nregs; ++i, offs += 16)
6817 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6818 return ha; */
6819 int i;
6820 tree tmp_ha, field_t, field_ptr_t;
6822 /* Declare a local variable. */
6823 tmp_ha = create_tmp_var_raw (type, "ha");
6824 gimple_add_tmp_var (tmp_ha);
6826 /* Establish the base type. */
6827 switch (ag_mode)
6829 case SFmode:
6830 field_t = float_type_node;
6831 field_ptr_t = float_ptr_type_node;
6832 break;
6833 case DFmode:
6834 field_t = double_type_node;
6835 field_ptr_t = double_ptr_type_node;
6836 break;
6837 case TFmode:
6838 field_t = long_double_type_node;
6839 field_ptr_t = long_double_ptr_type_node;
6840 break;
6841 /* The half precision and quad precision are not fully supported yet. Enable
6842 the following code after the support is complete. Need to find the correct
6843 type node for __fp16 *. */
6844 #if 0
6845 case HFmode:
6846 field_t = float_type_node;
6847 field_ptr_t = float_ptr_type_node;
6848 break;
6849 #endif
6850 case V2SImode:
6851 case V4SImode:
6853 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6854 field_t = build_vector_type_for_mode (innertype, ag_mode);
6855 field_ptr_t = build_pointer_type (field_t);
6857 break;
6858 default:
6859 gcc_assert (0);
6862 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6863 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6864 addr = t;
6865 t = fold_convert (field_ptr_t, addr);
6866 t = build2 (MODIFY_EXPR, field_t,
6867 build1 (INDIRECT_REF, field_t, tmp_ha),
6868 build1 (INDIRECT_REF, field_t, t));
6870 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6871 for (i = 1; i < nregs; ++i)
6873 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6874 u = fold_convert (field_ptr_t, addr);
6875 u = build2 (MODIFY_EXPR, field_t,
6876 build2 (MEM_REF, field_t, tmp_ha,
6877 build_int_cst (field_ptr_t,
6878 (i *
6879 int_size_in_bytes (field_t)))),
6880 build1 (INDIRECT_REF, field_t, u));
6881 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6884 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6885 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6888 COND_EXPR_ELSE (cond2) = t;
6889 addr = fold_convert (build_pointer_type (type), cond1);
6890 addr = build_va_arg_indirect_ref (addr);
6892 if (indirect_p)
6893 addr = build_va_arg_indirect_ref (addr);
6895 return addr;
6898 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6900 static void
6901 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6902 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6903 int no_rtl)
6905 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6906 CUMULATIVE_ARGS local_cum;
6907 int gr_saved, vr_saved;
6909 /* The caller has advanced CUM up to, but not beyond, the last named
6910 argument. Advance a local copy of CUM past the last "real" named
6911 argument, to find out how many registers are left over. */
6912 local_cum = *cum;
6913 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6915 /* Found out how many registers we need to save. */
6916 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6917 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6919 if (TARGET_GENERAL_REGS_ONLY)
6921 if (local_cum.aapcs_nvrn > 0)
6922 sorry ("%qs and floating point or vector arguments",
6923 "-mgeneral-regs-only");
6924 vr_saved = 0;
6927 if (!no_rtl)
6929 if (gr_saved > 0)
6931 rtx ptr, mem;
6933 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6934 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6935 - gr_saved * UNITS_PER_WORD);
6936 mem = gen_frame_mem (BLKmode, ptr);
6937 set_mem_alias_set (mem, get_varargs_alias_set ());
6939 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6940 mem, gr_saved);
6942 if (vr_saved > 0)
6944 /* We can't use move_block_from_reg, because it will use
6945 the wrong mode, storing D regs only. */
6946 enum machine_mode mode = TImode;
6947 int off, i;
6949 /* Set OFF to the offset from virtual_incoming_args_rtx of
6950 the first vector register. The VR save area lies below
6951 the GR one, and is aligned to 16 bytes. */
6952 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6953 STACK_BOUNDARY / BITS_PER_UNIT);
6954 off -= vr_saved * UNITS_PER_VREG;
6956 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6958 rtx ptr, mem;
6960 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6961 mem = gen_frame_mem (mode, ptr);
6962 set_mem_alias_set (mem, get_varargs_alias_set ());
6963 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6964 off += UNITS_PER_VREG;
6969 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6970 any complication of having crtl->args.pretend_args_size changed. */
6971 cfun->machine->saved_varargs_size
6972 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6973 STACK_BOUNDARY / BITS_PER_UNIT)
6974 + vr_saved * UNITS_PER_VREG);
6977 static void
6978 aarch64_conditional_register_usage (void)
6980 int i;
6981 if (!TARGET_FLOAT)
6983 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6985 fixed_regs[i] = 1;
6986 call_used_regs[i] = 1;
6991 /* Walk down the type tree of TYPE counting consecutive base elements.
6992 If *MODEP is VOIDmode, then set it to the first valid floating point
6993 type. If a non-floating point type is found, or if a floating point
6994 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6995 otherwise return the count in the sub-tree. */
6996 static int
6997 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6999 enum machine_mode mode;
7000 HOST_WIDE_INT size;
7002 switch (TREE_CODE (type))
7004 case REAL_TYPE:
7005 mode = TYPE_MODE (type);
7006 if (mode != DFmode && mode != SFmode && mode != TFmode)
7007 return -1;
7009 if (*modep == VOIDmode)
7010 *modep = mode;
7012 if (*modep == mode)
7013 return 1;
7015 break;
7017 case COMPLEX_TYPE:
7018 mode = TYPE_MODE (TREE_TYPE (type));
7019 if (mode != DFmode && mode != SFmode && mode != TFmode)
7020 return -1;
7022 if (*modep == VOIDmode)
7023 *modep = mode;
7025 if (*modep == mode)
7026 return 2;
7028 break;
7030 case VECTOR_TYPE:
7031 /* Use V2SImode and V4SImode as representatives of all 64-bit
7032 and 128-bit vector types. */
7033 size = int_size_in_bytes (type);
7034 switch (size)
7036 case 8:
7037 mode = V2SImode;
7038 break;
7039 case 16:
7040 mode = V4SImode;
7041 break;
7042 default:
7043 return -1;
7046 if (*modep == VOIDmode)
7047 *modep = mode;
7049 /* Vector modes are considered to be opaque: two vectors are
7050 equivalent for the purposes of being homogeneous aggregates
7051 if they are the same size. */
7052 if (*modep == mode)
7053 return 1;
7055 break;
7057 case ARRAY_TYPE:
7059 int count;
7060 tree index = TYPE_DOMAIN (type);
7062 /* Can't handle incomplete types. */
7063 if (!COMPLETE_TYPE_P (type))
7064 return -1;
7066 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7067 if (count == -1
7068 || !index
7069 || !TYPE_MAX_VALUE (index)
7070 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7071 || !TYPE_MIN_VALUE (index)
7072 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7073 || count < 0)
7074 return -1;
7076 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7077 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7079 /* There must be no padding. */
7080 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7081 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7082 != count * GET_MODE_BITSIZE (*modep)))
7083 return -1;
7085 return count;
7088 case RECORD_TYPE:
7090 int count = 0;
7091 int sub_count;
7092 tree field;
7094 /* Can't handle incomplete types. */
7095 if (!COMPLETE_TYPE_P (type))
7096 return -1;
7098 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7100 if (TREE_CODE (field) != FIELD_DECL)
7101 continue;
7103 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7104 if (sub_count < 0)
7105 return -1;
7106 count += sub_count;
7109 /* There must be no padding. */
7110 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7111 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7112 != count * GET_MODE_BITSIZE (*modep)))
7113 return -1;
7115 return count;
7118 case UNION_TYPE:
7119 case QUAL_UNION_TYPE:
7121 /* These aren't very interesting except in a degenerate case. */
7122 int count = 0;
7123 int sub_count;
7124 tree field;
7126 /* Can't handle incomplete types. */
7127 if (!COMPLETE_TYPE_P (type))
7128 return -1;
7130 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7132 if (TREE_CODE (field) != FIELD_DECL)
7133 continue;
7135 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7136 if (sub_count < 0)
7137 return -1;
7138 count = count > sub_count ? count : sub_count;
7141 /* There must be no padding. */
7142 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7143 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7144 != count * GET_MODE_BITSIZE (*modep)))
7145 return -1;
7147 return count;
7150 default:
7151 break;
7154 return -1;
7157 /* Return true if we use LRA instead of reload pass. */
7158 static bool
7159 aarch64_lra_p (void)
7161 return aarch64_lra_flag;
7164 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7165 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7166 array types. The C99 floating-point complex types are also considered
7167 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7168 types, which are GCC extensions and out of the scope of AAPCS64, are
7169 treated as composite types here as well.
7171 Note that MODE itself is not sufficient in determining whether a type
7172 is such a composite type or not. This is because
7173 stor-layout.c:compute_record_mode may have already changed the MODE
7174 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7175 structure with only one field may have its MODE set to the mode of the
7176 field. Also an integer mode whose size matches the size of the
7177 RECORD_TYPE type may be used to substitute the original mode
7178 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7179 solely relied on. */
7181 static bool
7182 aarch64_composite_type_p (const_tree type,
7183 enum machine_mode mode)
7185 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7186 return true;
7188 if (mode == BLKmode
7189 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7190 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7191 return true;
7193 return false;
7196 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7197 type as described in AAPCS64 \S 4.1.2.
7199 See the comment above aarch64_composite_type_p for the notes on MODE. */
7201 static bool
7202 aarch64_short_vector_p (const_tree type,
7203 enum machine_mode mode)
7205 HOST_WIDE_INT size = -1;
7207 if (type && TREE_CODE (type) == VECTOR_TYPE)
7208 size = int_size_in_bytes (type);
7209 else if (!aarch64_composite_type_p (type, mode)
7210 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7211 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7212 size = GET_MODE_SIZE (mode);
7214 return (size == 8 || size == 16) ? true : false;
7217 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7218 shall be passed or returned in simd/fp register(s) (providing these
7219 parameter passing registers are available).
7221 Upon successful return, *COUNT returns the number of needed registers,
7222 *BASE_MODE returns the mode of the individual register and when IS_HAF
7223 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7224 floating-point aggregate or a homogeneous short-vector aggregate. */
7226 static bool
7227 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7228 const_tree type,
7229 enum machine_mode *base_mode,
7230 int *count,
7231 bool *is_ha)
7233 enum machine_mode new_mode = VOIDmode;
7234 bool composite_p = aarch64_composite_type_p (type, mode);
7236 if (is_ha != NULL) *is_ha = false;
7238 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7239 || aarch64_short_vector_p (type, mode))
7241 *count = 1;
7242 new_mode = mode;
7244 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7246 if (is_ha != NULL) *is_ha = true;
7247 *count = 2;
7248 new_mode = GET_MODE_INNER (mode);
7250 else if (type && composite_p)
7252 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7254 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7256 if (is_ha != NULL) *is_ha = true;
7257 *count = ag_count;
7259 else
7260 return false;
7262 else
7263 return false;
7265 *base_mode = new_mode;
7266 return true;
7269 /* Implement TARGET_STRUCT_VALUE_RTX. */
7271 static rtx
7272 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7273 int incoming ATTRIBUTE_UNUSED)
7275 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7278 /* Implements target hook vector_mode_supported_p. */
7279 static bool
7280 aarch64_vector_mode_supported_p (enum machine_mode mode)
7282 if (TARGET_SIMD
7283 && (mode == V4SImode || mode == V8HImode
7284 || mode == V16QImode || mode == V2DImode
7285 || mode == V2SImode || mode == V4HImode
7286 || mode == V8QImode || mode == V2SFmode
7287 || mode == V4SFmode || mode == V2DFmode
7288 || mode == V1DFmode))
7289 return true;
7291 return false;
7294 /* Return appropriate SIMD container
7295 for MODE within a vector of WIDTH bits. */
7296 static enum machine_mode
7297 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7299 gcc_assert (width == 64 || width == 128);
7300 if (TARGET_SIMD)
7302 if (width == 128)
7303 switch (mode)
7305 case DFmode:
7306 return V2DFmode;
7307 case SFmode:
7308 return V4SFmode;
7309 case SImode:
7310 return V4SImode;
7311 case HImode:
7312 return V8HImode;
7313 case QImode:
7314 return V16QImode;
7315 case DImode:
7316 return V2DImode;
7317 default:
7318 break;
7320 else
7321 switch (mode)
7323 case SFmode:
7324 return V2SFmode;
7325 case SImode:
7326 return V2SImode;
7327 case HImode:
7328 return V4HImode;
7329 case QImode:
7330 return V8QImode;
7331 default:
7332 break;
7335 return word_mode;
7338 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7339 static enum machine_mode
7340 aarch64_preferred_simd_mode (enum machine_mode mode)
7342 return aarch64_simd_container_mode (mode, 128);
7345 /* Return the bitmask of possible vector sizes for the vectorizer
7346 to iterate over. */
7347 static unsigned int
7348 aarch64_autovectorize_vector_sizes (void)
7350 return (16 | 8);
7353 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7354 vector types in order to conform to the AAPCS64 (see "Procedure
7355 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7356 qualify for emission with the mangled names defined in that document,
7357 a vector type must not only be of the correct mode but also be
7358 composed of AdvSIMD vector element types (e.g.
7359 _builtin_aarch64_simd_qi); these types are registered by
7360 aarch64_init_simd_builtins (). In other words, vector types defined
7361 in other ways e.g. via vector_size attribute will get default
7362 mangled names. */
7363 typedef struct
7365 enum machine_mode mode;
7366 const char *element_type_name;
7367 const char *mangled_name;
7368 } aarch64_simd_mangle_map_entry;
7370 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7371 /* 64-bit containerized types. */
7372 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7373 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7374 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7375 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7376 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7377 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7378 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7379 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7380 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7381 /* 128-bit containerized types. */
7382 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7383 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7384 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7385 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7386 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7387 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7388 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7389 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7390 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7391 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7392 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7393 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7394 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7395 { VOIDmode, NULL, NULL }
7398 /* Implement TARGET_MANGLE_TYPE. */
7400 static const char *
7401 aarch64_mangle_type (const_tree type)
7403 /* The AArch64 ABI documents say that "__va_list" has to be
7404 managled as if it is in the "std" namespace. */
7405 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7406 return "St9__va_list";
7408 /* Check the mode of the vector type, and the name of the vector
7409 element type, against the table. */
7410 if (TREE_CODE (type) == VECTOR_TYPE)
7412 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7414 while (pos->mode != VOIDmode)
7416 tree elt_type = TREE_TYPE (type);
7418 if (pos->mode == TYPE_MODE (type)
7419 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7420 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7421 pos->element_type_name))
7422 return pos->mangled_name;
7424 pos++;
7428 /* Use the default mangling. */
7429 return NULL;
7432 /* Return the equivalent letter for size. */
7433 static char
7434 sizetochar (int size)
7436 switch (size)
7438 case 64: return 'd';
7439 case 32: return 's';
7440 case 16: return 'h';
7441 case 8 : return 'b';
7442 default: gcc_unreachable ();
7446 /* Return true iff x is a uniform vector of floating-point
7447 constants, and the constant can be represented in
7448 quarter-precision form. Note, as aarch64_float_const_representable
7449 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7450 static bool
7451 aarch64_vect_float_const_representable_p (rtx x)
7453 int i = 0;
7454 REAL_VALUE_TYPE r0, ri;
7455 rtx x0, xi;
7457 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7458 return false;
7460 x0 = CONST_VECTOR_ELT (x, 0);
7461 if (!CONST_DOUBLE_P (x0))
7462 return false;
7464 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7466 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7468 xi = CONST_VECTOR_ELT (x, i);
7469 if (!CONST_DOUBLE_P (xi))
7470 return false;
7472 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7473 if (!REAL_VALUES_EQUAL (r0, ri))
7474 return false;
7477 return aarch64_float_const_representable_p (x0);
7480 /* Return true for valid and false for invalid. */
7481 bool
7482 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7483 struct simd_immediate_info *info)
7485 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7486 matches = 1; \
7487 for (i = 0; i < idx; i += (STRIDE)) \
7488 if (!(TEST)) \
7489 matches = 0; \
7490 if (matches) \
7492 immtype = (CLASS); \
7493 elsize = (ELSIZE); \
7494 eshift = (SHIFT); \
7495 emvn = (NEG); \
7496 break; \
7499 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7500 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7501 unsigned char bytes[16];
7502 int immtype = -1, matches;
7503 unsigned int invmask = inverse ? 0xff : 0;
7504 int eshift, emvn;
7506 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7508 if (! (aarch64_simd_imm_zero_p (op, mode)
7509 || aarch64_vect_float_const_representable_p (op)))
7510 return false;
7512 if (info)
7514 info->value = CONST_VECTOR_ELT (op, 0);
7515 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7516 info->mvn = false;
7517 info->shift = 0;
7520 return true;
7523 /* Splat vector constant out into a byte vector. */
7524 for (i = 0; i < n_elts; i++)
7526 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7527 it must be laid out in the vector register in reverse order. */
7528 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7529 unsigned HOST_WIDE_INT elpart;
7530 unsigned int part, parts;
7532 if (GET_CODE (el) == CONST_INT)
7534 elpart = INTVAL (el);
7535 parts = 1;
7537 else if (GET_CODE (el) == CONST_DOUBLE)
7539 elpart = CONST_DOUBLE_LOW (el);
7540 parts = 2;
7542 else
7543 gcc_unreachable ();
7545 for (part = 0; part < parts; part++)
7547 unsigned int byte;
7548 for (byte = 0; byte < innersize; byte++)
7550 bytes[idx++] = (elpart & 0xff) ^ invmask;
7551 elpart >>= BITS_PER_UNIT;
7553 if (GET_CODE (el) == CONST_DOUBLE)
7554 elpart = CONST_DOUBLE_HIGH (el);
7558 /* Sanity check. */
7559 gcc_assert (idx == GET_MODE_SIZE (mode));
7563 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7564 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7566 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7567 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7569 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7570 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7572 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7573 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7575 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7577 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7579 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7580 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7582 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7583 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7585 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7586 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7588 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7589 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7591 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7593 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7595 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7596 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7598 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7599 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7601 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7602 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7604 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7605 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7607 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7609 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7610 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7612 while (0);
7614 if (immtype == -1)
7615 return false;
7617 if (info)
7619 info->element_width = elsize;
7620 info->mvn = emvn != 0;
7621 info->shift = eshift;
7623 unsigned HOST_WIDE_INT imm = 0;
7625 if (immtype >= 12 && immtype <= 15)
7626 info->msl = true;
7628 /* Un-invert bytes of recognized vector, if necessary. */
7629 if (invmask != 0)
7630 for (i = 0; i < idx; i++)
7631 bytes[i] ^= invmask;
7633 if (immtype == 17)
7635 /* FIXME: Broken on 32-bit H_W_I hosts. */
7636 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7638 for (i = 0; i < 8; i++)
7639 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7640 << (i * BITS_PER_UNIT);
7643 info->value = GEN_INT (imm);
7645 else
7647 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7648 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7650 /* Construct 'abcdefgh' because the assembler cannot handle
7651 generic constants. */
7652 if (info->mvn)
7653 imm = ~imm;
7654 imm = (imm >> info->shift) & 0xff;
7655 info->value = GEN_INT (imm);
7659 return true;
7660 #undef CHECK
7663 static bool
7664 aarch64_const_vec_all_same_int_p (rtx x,
7665 HOST_WIDE_INT minval,
7666 HOST_WIDE_INT maxval)
7668 HOST_WIDE_INT firstval;
7669 int count, i;
7671 if (GET_CODE (x) != CONST_VECTOR
7672 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7673 return false;
7675 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7676 if (firstval < minval || firstval > maxval)
7677 return false;
7679 count = CONST_VECTOR_NUNITS (x);
7680 for (i = 1; i < count; i++)
7681 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7682 return false;
7684 return true;
7687 /* Check of immediate shift constants are within range. */
7688 bool
7689 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7691 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7692 if (left)
7693 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7694 else
7695 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7698 /* Return true if X is a uniform vector where all elements
7699 are either the floating-point constant 0.0 or the
7700 integer constant 0. */
7701 bool
7702 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7704 return x == CONST0_RTX (mode);
7707 bool
7708 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7710 HOST_WIDE_INT imm = INTVAL (x);
7711 int i;
7713 for (i = 0; i < 8; i++)
7715 unsigned int byte = imm & 0xff;
7716 if (byte != 0xff && byte != 0)
7717 return false;
7718 imm >>= 8;
7721 return true;
7724 bool
7725 aarch64_mov_operand_p (rtx x,
7726 enum aarch64_symbol_context context,
7727 enum machine_mode mode)
7729 if (GET_CODE (x) == HIGH
7730 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7731 return true;
7733 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7734 return true;
7736 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7737 return true;
7739 return aarch64_classify_symbolic_expression (x, context)
7740 == SYMBOL_TINY_ABSOLUTE;
7743 /* Return a const_int vector of VAL. */
7745 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7747 int nunits = GET_MODE_NUNITS (mode);
7748 rtvec v = rtvec_alloc (nunits);
7749 int i;
7751 for (i=0; i < nunits; i++)
7752 RTVEC_ELT (v, i) = GEN_INT (val);
7754 return gen_rtx_CONST_VECTOR (mode, v);
7757 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7759 bool
7760 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7762 enum machine_mode vmode;
7764 gcc_assert (!VECTOR_MODE_P (mode));
7765 vmode = aarch64_preferred_simd_mode (mode);
7766 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7767 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7770 /* Construct and return a PARALLEL RTX vector. */
7772 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7774 int nunits = GET_MODE_NUNITS (mode);
7775 rtvec v = rtvec_alloc (nunits / 2);
7776 int base = high ? nunits / 2 : 0;
7777 rtx t1;
7778 int i;
7780 for (i=0; i < nunits / 2; i++)
7781 RTVEC_ELT (v, i) = GEN_INT (base + i);
7783 t1 = gen_rtx_PARALLEL (mode, v);
7784 return t1;
7787 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7788 HIGH (exclusive). */
7789 void
7790 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7792 HOST_WIDE_INT lane;
7793 gcc_assert (GET_CODE (operand) == CONST_INT);
7794 lane = INTVAL (operand);
7796 if (lane < low || lane >= high)
7797 error ("lane out of range");
7800 void
7801 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7803 gcc_assert (GET_CODE (operand) == CONST_INT);
7804 HOST_WIDE_INT lane = INTVAL (operand);
7806 if (lane < low || lane >= high)
7807 error ("constant out of range");
7810 /* Emit code to reinterpret one AdvSIMD type as another,
7811 without altering bits. */
7812 void
7813 aarch64_simd_reinterpret (rtx dest, rtx src)
7815 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7818 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7819 registers). */
7820 void
7821 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7822 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7823 rtx op1)
7825 rtx mem = gen_rtx_MEM (mode, destaddr);
7826 rtx tmp1 = gen_reg_rtx (mode);
7827 rtx tmp2 = gen_reg_rtx (mode);
7829 emit_insn (intfn (tmp1, op1, tmp2));
7831 emit_move_insn (mem, tmp1);
7832 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7833 emit_move_insn (mem, tmp2);
7836 /* Return TRUE if OP is a valid vector addressing mode. */
7837 bool
7838 aarch64_simd_mem_operand_p (rtx op)
7840 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7841 || GET_CODE (XEXP (op, 0)) == REG);
7844 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7845 not to early-clobber SRC registers in the process.
7847 We assume that the operands described by SRC and DEST represent a
7848 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7849 number of components into which the copy has been decomposed. */
7850 void
7851 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7852 rtx *src, unsigned int count)
7854 unsigned int i;
7856 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7857 || REGNO (operands[0]) < REGNO (operands[1]))
7859 for (i = 0; i < count; i++)
7861 operands[2 * i] = dest[i];
7862 operands[2 * i + 1] = src[i];
7865 else
7867 for (i = 0; i < count; i++)
7869 operands[2 * i] = dest[count - i - 1];
7870 operands[2 * i + 1] = src[count - i - 1];
7875 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7876 one of VSTRUCT modes: OI, CI or XI. */
7878 aarch64_simd_attr_length_move (rtx insn)
7880 enum machine_mode mode;
7882 extract_insn_cached (insn);
7884 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7886 mode = GET_MODE (recog_data.operand[0]);
7887 switch (mode)
7889 case OImode:
7890 return 8;
7891 case CImode:
7892 return 12;
7893 case XImode:
7894 return 16;
7895 default:
7896 gcc_unreachable ();
7899 return 4;
7902 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7903 alignment of a vector to 128 bits. */
7904 static HOST_WIDE_INT
7905 aarch64_simd_vector_alignment (const_tree type)
7907 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7908 return MIN (align, 128);
7911 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7912 static bool
7913 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7915 if (is_packed)
7916 return false;
7918 /* We guarantee alignment for vectors up to 128-bits. */
7919 if (tree_int_cst_compare (TYPE_SIZE (type),
7920 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7921 return false;
7923 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7924 return true;
7927 /* If VALS is a vector constant that can be loaded into a register
7928 using DUP, generate instructions to do so and return an RTX to
7929 assign to the register. Otherwise return NULL_RTX. */
7930 static rtx
7931 aarch64_simd_dup_constant (rtx vals)
7933 enum machine_mode mode = GET_MODE (vals);
7934 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7935 int n_elts = GET_MODE_NUNITS (mode);
7936 bool all_same = true;
7937 rtx x;
7938 int i;
7940 if (GET_CODE (vals) != CONST_VECTOR)
7941 return NULL_RTX;
7943 for (i = 1; i < n_elts; ++i)
7945 x = CONST_VECTOR_ELT (vals, i);
7946 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7947 all_same = false;
7950 if (!all_same)
7951 return NULL_RTX;
7953 /* We can load this constant by using DUP and a constant in a
7954 single ARM register. This will be cheaper than a vector
7955 load. */
7956 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7957 return gen_rtx_VEC_DUPLICATE (mode, x);
7961 /* Generate code to load VALS, which is a PARALLEL containing only
7962 constants (for vec_init) or CONST_VECTOR, efficiently into a
7963 register. Returns an RTX to copy into the register, or NULL_RTX
7964 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7965 static rtx
7966 aarch64_simd_make_constant (rtx vals)
7968 enum machine_mode mode = GET_MODE (vals);
7969 rtx const_dup;
7970 rtx const_vec = NULL_RTX;
7971 int n_elts = GET_MODE_NUNITS (mode);
7972 int n_const = 0;
7973 int i;
7975 if (GET_CODE (vals) == CONST_VECTOR)
7976 const_vec = vals;
7977 else if (GET_CODE (vals) == PARALLEL)
7979 /* A CONST_VECTOR must contain only CONST_INTs and
7980 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7981 Only store valid constants in a CONST_VECTOR. */
7982 for (i = 0; i < n_elts; ++i)
7984 rtx x = XVECEXP (vals, 0, i);
7985 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7986 n_const++;
7988 if (n_const == n_elts)
7989 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7991 else
7992 gcc_unreachable ();
7994 if (const_vec != NULL_RTX
7995 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7996 /* Load using MOVI/MVNI. */
7997 return const_vec;
7998 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7999 /* Loaded using DUP. */
8000 return const_dup;
8001 else if (const_vec != NULL_RTX)
8002 /* Load from constant pool. We can not take advantage of single-cycle
8003 LD1 because we need a PC-relative addressing mode. */
8004 return const_vec;
8005 else
8006 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8007 We can not construct an initializer. */
8008 return NULL_RTX;
8011 void
8012 aarch64_expand_vector_init (rtx target, rtx vals)
8014 enum machine_mode mode = GET_MODE (target);
8015 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8016 int n_elts = GET_MODE_NUNITS (mode);
8017 int n_var = 0, one_var = -1;
8018 bool all_same = true;
8019 rtx x, mem;
8020 int i;
8022 x = XVECEXP (vals, 0, 0);
8023 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8024 n_var = 1, one_var = 0;
8026 for (i = 1; i < n_elts; ++i)
8028 x = XVECEXP (vals, 0, i);
8029 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8030 ++n_var, one_var = i;
8032 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8033 all_same = false;
8036 if (n_var == 0)
8038 rtx constant = aarch64_simd_make_constant (vals);
8039 if (constant != NULL_RTX)
8041 emit_move_insn (target, constant);
8042 return;
8046 /* Splat a single non-constant element if we can. */
8047 if (all_same)
8049 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8050 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8051 return;
8054 /* One field is non-constant. Load constant then overwrite varying
8055 field. This is more efficient than using the stack. */
8056 if (n_var == 1)
8058 rtx copy = copy_rtx (vals);
8059 rtx index = GEN_INT (one_var);
8060 enum insn_code icode;
8062 /* Load constant part of vector, substitute neighboring value for
8063 varying element. */
8064 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8065 aarch64_expand_vector_init (target, copy);
8067 /* Insert variable. */
8068 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8069 icode = optab_handler (vec_set_optab, mode);
8070 gcc_assert (icode != CODE_FOR_nothing);
8071 emit_insn (GEN_FCN (icode) (target, x, index));
8072 return;
8075 /* Construct the vector in memory one field at a time
8076 and load the whole vector. */
8077 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8078 for (i = 0; i < n_elts; i++)
8079 emit_move_insn (adjust_address_nv (mem, inner_mode,
8080 i * GET_MODE_SIZE (inner_mode)),
8081 XVECEXP (vals, 0, i));
8082 emit_move_insn (target, mem);
8086 static unsigned HOST_WIDE_INT
8087 aarch64_shift_truncation_mask (enum machine_mode mode)
8089 return
8090 (aarch64_vector_mode_supported_p (mode)
8091 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8094 #ifndef TLS_SECTION_ASM_FLAG
8095 #define TLS_SECTION_ASM_FLAG 'T'
8096 #endif
8098 void
8099 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8100 tree decl ATTRIBUTE_UNUSED)
8102 char flagchars[10], *f = flagchars;
8104 /* If we have already declared this section, we can use an
8105 abbreviated form to switch back to it -- unless this section is
8106 part of a COMDAT groups, in which case GAS requires the full
8107 declaration every time. */
8108 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8109 && (flags & SECTION_DECLARED))
8111 fprintf (asm_out_file, "\t.section\t%s\n", name);
8112 return;
8115 if (!(flags & SECTION_DEBUG))
8116 *f++ = 'a';
8117 if (flags & SECTION_WRITE)
8118 *f++ = 'w';
8119 if (flags & SECTION_CODE)
8120 *f++ = 'x';
8121 if (flags & SECTION_SMALL)
8122 *f++ = 's';
8123 if (flags & SECTION_MERGE)
8124 *f++ = 'M';
8125 if (flags & SECTION_STRINGS)
8126 *f++ = 'S';
8127 if (flags & SECTION_TLS)
8128 *f++ = TLS_SECTION_ASM_FLAG;
8129 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8130 *f++ = 'G';
8131 *f = '\0';
8133 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8135 if (!(flags & SECTION_NOTYPE))
8137 const char *type;
8138 const char *format;
8140 if (flags & SECTION_BSS)
8141 type = "nobits";
8142 else
8143 type = "progbits";
8145 #ifdef TYPE_OPERAND_FMT
8146 format = "," TYPE_OPERAND_FMT;
8147 #else
8148 format = ",@%s";
8149 #endif
8151 fprintf (asm_out_file, format, type);
8153 if (flags & SECTION_ENTSIZE)
8154 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8155 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8157 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8158 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8159 else
8160 fprintf (asm_out_file, ",%s,comdat",
8161 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8165 putc ('\n', asm_out_file);
8168 /* Select a format to encode pointers in exception handling data. */
8170 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8172 int type;
8173 switch (aarch64_cmodel)
8175 case AARCH64_CMODEL_TINY:
8176 case AARCH64_CMODEL_TINY_PIC:
8177 case AARCH64_CMODEL_SMALL:
8178 case AARCH64_CMODEL_SMALL_PIC:
8179 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8180 for everything. */
8181 type = DW_EH_PE_sdata4;
8182 break;
8183 default:
8184 /* No assumptions here. 8-byte relocs required. */
8185 type = DW_EH_PE_sdata8;
8186 break;
8188 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8191 /* Emit load exclusive. */
8193 static void
8194 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8195 rtx mem, rtx model_rtx)
8197 rtx (*gen) (rtx, rtx, rtx);
8199 switch (mode)
8201 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8202 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8203 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8204 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8205 default:
8206 gcc_unreachable ();
8209 emit_insn (gen (rval, mem, model_rtx));
8212 /* Emit store exclusive. */
8214 static void
8215 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8216 rtx rval, rtx mem, rtx model_rtx)
8218 rtx (*gen) (rtx, rtx, rtx, rtx);
8220 switch (mode)
8222 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8223 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8224 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8225 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8226 default:
8227 gcc_unreachable ();
8230 emit_insn (gen (bval, rval, mem, model_rtx));
8233 /* Mark the previous jump instruction as unlikely. */
8235 static void
8236 aarch64_emit_unlikely_jump (rtx insn)
8238 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8240 insn = emit_jump_insn (insn);
8241 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8244 /* Expand a compare and swap pattern. */
8246 void
8247 aarch64_expand_compare_and_swap (rtx operands[])
8249 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8250 enum machine_mode mode, cmp_mode;
8251 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8253 bval = operands[0];
8254 rval = operands[1];
8255 mem = operands[2];
8256 oldval = operands[3];
8257 newval = operands[4];
8258 is_weak = operands[5];
8259 mod_s = operands[6];
8260 mod_f = operands[7];
8261 mode = GET_MODE (mem);
8262 cmp_mode = mode;
8264 /* Normally the succ memory model must be stronger than fail, but in the
8265 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8266 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8268 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8269 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8270 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8272 switch (mode)
8274 case QImode:
8275 case HImode:
8276 /* For short modes, we're going to perform the comparison in SImode,
8277 so do the zero-extension now. */
8278 cmp_mode = SImode;
8279 rval = gen_reg_rtx (SImode);
8280 oldval = convert_modes (SImode, mode, oldval, true);
8281 /* Fall through. */
8283 case SImode:
8284 case DImode:
8285 /* Force the value into a register if needed. */
8286 if (!aarch64_plus_operand (oldval, mode))
8287 oldval = force_reg (cmp_mode, oldval);
8288 break;
8290 default:
8291 gcc_unreachable ();
8294 switch (mode)
8296 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8297 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8298 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8299 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8300 default:
8301 gcc_unreachable ();
8304 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8306 if (mode == QImode || mode == HImode)
8307 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8309 x = gen_rtx_REG (CCmode, CC_REGNUM);
8310 x = gen_rtx_EQ (SImode, x, const0_rtx);
8311 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8314 /* Split a compare and swap pattern. */
8316 void
8317 aarch64_split_compare_and_swap (rtx operands[])
8319 rtx rval, mem, oldval, newval, scratch;
8320 enum machine_mode mode;
8321 bool is_weak;
8322 rtx label1, label2, x, cond;
8324 rval = operands[0];
8325 mem = operands[1];
8326 oldval = operands[2];
8327 newval = operands[3];
8328 is_weak = (operands[4] != const0_rtx);
8329 scratch = operands[7];
8330 mode = GET_MODE (mem);
8332 label1 = NULL_RTX;
8333 if (!is_weak)
8335 label1 = gen_label_rtx ();
8336 emit_label (label1);
8338 label2 = gen_label_rtx ();
8340 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8342 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8343 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8344 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8345 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8346 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8348 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8350 if (!is_weak)
8352 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8353 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8354 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8355 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8357 else
8359 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8360 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8361 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8364 emit_label (label2);
8367 /* Split an atomic operation. */
8369 void
8370 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8371 rtx value, rtx model_rtx, rtx cond)
8373 enum machine_mode mode = GET_MODE (mem);
8374 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8375 rtx label, x;
8377 label = gen_label_rtx ();
8378 emit_label (label);
8380 if (new_out)
8381 new_out = gen_lowpart (wmode, new_out);
8382 if (old_out)
8383 old_out = gen_lowpart (wmode, old_out);
8384 else
8385 old_out = new_out;
8386 value = simplify_gen_subreg (wmode, value, mode, 0);
8388 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8390 switch (code)
8392 case SET:
8393 new_out = value;
8394 break;
8396 case NOT:
8397 x = gen_rtx_AND (wmode, old_out, value);
8398 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8399 x = gen_rtx_NOT (wmode, new_out);
8400 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8401 break;
8403 case MINUS:
8404 if (CONST_INT_P (value))
8406 value = GEN_INT (-INTVAL (value));
8407 code = PLUS;
8409 /* Fall through. */
8411 default:
8412 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8413 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8414 break;
8417 aarch64_emit_store_exclusive (mode, cond, mem,
8418 gen_lowpart (mode, new_out), model_rtx);
8420 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8421 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8422 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8423 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8426 static void
8427 aarch64_print_extension (void)
8429 const struct aarch64_option_extension *opt = NULL;
8431 for (opt = all_extensions; opt->name != NULL; opt++)
8432 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8433 asm_fprintf (asm_out_file, "+%s", opt->name);
8435 asm_fprintf (asm_out_file, "\n");
8438 static void
8439 aarch64_start_file (void)
8441 if (selected_arch)
8443 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8444 aarch64_print_extension ();
8446 else if (selected_cpu)
8448 const char *truncated_name
8449 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8450 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8451 aarch64_print_extension ();
8453 default_file_start();
8456 /* Target hook for c_mode_for_suffix. */
8457 static enum machine_mode
8458 aarch64_c_mode_for_suffix (char suffix)
8460 if (suffix == 'q')
8461 return TFmode;
8463 return VOIDmode;
8466 /* We can only represent floating point constants which will fit in
8467 "quarter-precision" values. These values are characterised by
8468 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8471 (-1)^s * (n/16) * 2^r
8473 Where:
8474 's' is the sign bit.
8475 'n' is an integer in the range 16 <= n <= 31.
8476 'r' is an integer in the range -3 <= r <= 4. */
8478 /* Return true iff X can be represented by a quarter-precision
8479 floating point immediate operand X. Note, we cannot represent 0.0. */
8480 bool
8481 aarch64_float_const_representable_p (rtx x)
8483 /* This represents our current view of how many bits
8484 make up the mantissa. */
8485 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8486 int exponent;
8487 unsigned HOST_WIDE_INT mantissa, mask;
8488 HOST_WIDE_INT m1, m2;
8489 REAL_VALUE_TYPE r, m;
8491 if (!CONST_DOUBLE_P (x))
8492 return false;
8494 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8496 /* We cannot represent infinities, NaNs or +/-zero. We won't
8497 know if we have +zero until we analyse the mantissa, but we
8498 can reject the other invalid values. */
8499 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8500 || REAL_VALUE_MINUS_ZERO (r))
8501 return false;
8503 /* Extract exponent. */
8504 r = real_value_abs (&r);
8505 exponent = REAL_EXP (&r);
8507 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8508 highest (sign) bit, with a fixed binary point at bit point_pos.
8509 m1 holds the low part of the mantissa, m2 the high part.
8510 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8511 bits for the mantissa, this can fail (low bits will be lost). */
8512 real_ldexp (&m, &r, point_pos - exponent);
8513 REAL_VALUE_TO_INT (&m1, &m2, m);
8515 /* If the low part of the mantissa has bits set we cannot represent
8516 the value. */
8517 if (m1 != 0)
8518 return false;
8519 /* We have rejected the lower HOST_WIDE_INT, so update our
8520 understanding of how many bits lie in the mantissa and
8521 look only at the high HOST_WIDE_INT. */
8522 mantissa = m2;
8523 point_pos -= HOST_BITS_PER_WIDE_INT;
8525 /* We can only represent values with a mantissa of the form 1.xxxx. */
8526 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8527 if ((mantissa & mask) != 0)
8528 return false;
8530 /* Having filtered unrepresentable values, we may now remove all
8531 but the highest 5 bits. */
8532 mantissa >>= point_pos - 5;
8534 /* We cannot represent the value 0.0, so reject it. This is handled
8535 elsewhere. */
8536 if (mantissa == 0)
8537 return false;
8539 /* Then, as bit 4 is always set, we can mask it off, leaving
8540 the mantissa in the range [0, 15]. */
8541 mantissa &= ~(1 << 4);
8542 gcc_assert (mantissa <= 15);
8544 /* GCC internally does not use IEEE754-like encoding (where normalized
8545 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8546 Our mantissa values are shifted 4 places to the left relative to
8547 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8548 by 5 places to correct for GCC's representation. */
8549 exponent = 5 - exponent;
8551 return (exponent >= 0 && exponent <= 7);
8554 char*
8555 aarch64_output_simd_mov_immediate (rtx const_vector,
8556 enum machine_mode mode,
8557 unsigned width)
8559 bool is_valid;
8560 static char templ[40];
8561 const char *mnemonic;
8562 const char *shift_op;
8563 unsigned int lane_count = 0;
8564 char element_char;
8566 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8568 /* This will return true to show const_vector is legal for use as either
8569 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8570 also update INFO to show how the immediate should be generated. */
8571 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8572 gcc_assert (is_valid);
8574 element_char = sizetochar (info.element_width);
8575 lane_count = width / info.element_width;
8577 mode = GET_MODE_INNER (mode);
8578 if (mode == SFmode || mode == DFmode)
8580 gcc_assert (info.shift == 0 && ! info.mvn);
8581 if (aarch64_float_const_zero_rtx_p (info.value))
8582 info.value = GEN_INT (0);
8583 else
8585 #define buf_size 20
8586 REAL_VALUE_TYPE r;
8587 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8588 char float_buf[buf_size] = {'\0'};
8589 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8590 #undef buf_size
8592 if (lane_count == 1)
8593 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8594 else
8595 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8596 lane_count, element_char, float_buf);
8597 return templ;
8601 mnemonic = info.mvn ? "mvni" : "movi";
8602 shift_op = info.msl ? "msl" : "lsl";
8604 if (lane_count == 1)
8605 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8606 mnemonic, UINTVAL (info.value));
8607 else if (info.shift)
8608 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8609 ", %s %d", mnemonic, lane_count, element_char,
8610 UINTVAL (info.value), shift_op, info.shift);
8611 else
8612 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8613 mnemonic, lane_count, element_char, UINTVAL (info.value));
8614 return templ;
8617 char*
8618 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8619 enum machine_mode mode)
8621 enum machine_mode vmode;
8623 gcc_assert (!VECTOR_MODE_P (mode));
8624 vmode = aarch64_simd_container_mode (mode, 64);
8625 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8626 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8629 /* Split operands into moves from op[1] + op[2] into op[0]. */
8631 void
8632 aarch64_split_combinev16qi (rtx operands[3])
8634 unsigned int dest = REGNO (operands[0]);
8635 unsigned int src1 = REGNO (operands[1]);
8636 unsigned int src2 = REGNO (operands[2]);
8637 enum machine_mode halfmode = GET_MODE (operands[1]);
8638 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8639 rtx destlo, desthi;
8641 gcc_assert (halfmode == V16QImode);
8643 if (src1 == dest && src2 == dest + halfregs)
8645 /* No-op move. Can't split to nothing; emit something. */
8646 emit_note (NOTE_INSN_DELETED);
8647 return;
8650 /* Preserve register attributes for variable tracking. */
8651 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8652 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8653 GET_MODE_SIZE (halfmode));
8655 /* Special case of reversed high/low parts. */
8656 if (reg_overlap_mentioned_p (operands[2], destlo)
8657 && reg_overlap_mentioned_p (operands[1], desthi))
8659 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8660 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8661 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8663 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8665 /* Try to avoid unnecessary moves if part of the result
8666 is in the right place already. */
8667 if (src1 != dest)
8668 emit_move_insn (destlo, operands[1]);
8669 if (src2 != dest + halfregs)
8670 emit_move_insn (desthi, operands[2]);
8672 else
8674 if (src2 != dest + halfregs)
8675 emit_move_insn (desthi, operands[2]);
8676 if (src1 != dest)
8677 emit_move_insn (destlo, operands[1]);
8681 /* vec_perm support. */
8683 #define MAX_VECT_LEN 16
8685 struct expand_vec_perm_d
8687 rtx target, op0, op1;
8688 unsigned char perm[MAX_VECT_LEN];
8689 enum machine_mode vmode;
8690 unsigned char nelt;
8691 bool one_vector_p;
8692 bool testing_p;
8695 /* Generate a variable permutation. */
8697 static void
8698 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8700 enum machine_mode vmode = GET_MODE (target);
8701 bool one_vector_p = rtx_equal_p (op0, op1);
8703 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8704 gcc_checking_assert (GET_MODE (op0) == vmode);
8705 gcc_checking_assert (GET_MODE (op1) == vmode);
8706 gcc_checking_assert (GET_MODE (sel) == vmode);
8707 gcc_checking_assert (TARGET_SIMD);
8709 if (one_vector_p)
8711 if (vmode == V8QImode)
8713 /* Expand the argument to a V16QI mode by duplicating it. */
8714 rtx pair = gen_reg_rtx (V16QImode);
8715 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8716 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8718 else
8720 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8723 else
8725 rtx pair;
8727 if (vmode == V8QImode)
8729 pair = gen_reg_rtx (V16QImode);
8730 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8731 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8733 else
8735 pair = gen_reg_rtx (OImode);
8736 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8737 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8742 void
8743 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8745 enum machine_mode vmode = GET_MODE (target);
8746 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8747 bool one_vector_p = rtx_equal_p (op0, op1);
8748 rtx rmask[MAX_VECT_LEN], mask;
8750 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8752 /* The TBL instruction does not use a modulo index, so we must take care
8753 of that ourselves. */
8754 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8755 for (i = 0; i < nelt; ++i)
8756 rmask[i] = mask;
8757 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8758 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8760 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8763 /* Recognize patterns suitable for the TRN instructions. */
8764 static bool
8765 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8767 unsigned int i, odd, mask, nelt = d->nelt;
8768 rtx out, in0, in1, x;
8769 rtx (*gen) (rtx, rtx, rtx);
8770 enum machine_mode vmode = d->vmode;
8772 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8773 return false;
8775 /* Note that these are little-endian tests.
8776 We correct for big-endian later. */
8777 if (d->perm[0] == 0)
8778 odd = 0;
8779 else if (d->perm[0] == 1)
8780 odd = 1;
8781 else
8782 return false;
8783 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8785 for (i = 0; i < nelt; i += 2)
8787 if (d->perm[i] != i + odd)
8788 return false;
8789 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8790 return false;
8793 /* Success! */
8794 if (d->testing_p)
8795 return true;
8797 in0 = d->op0;
8798 in1 = d->op1;
8799 if (BYTES_BIG_ENDIAN)
8801 x = in0, in0 = in1, in1 = x;
8802 odd = !odd;
8804 out = d->target;
8806 if (odd)
8808 switch (vmode)
8810 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8811 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8812 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8813 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8814 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8815 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8816 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8817 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8818 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8819 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8820 default:
8821 return false;
8824 else
8826 switch (vmode)
8828 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8829 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8830 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8831 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8832 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8833 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8834 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8835 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8836 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8837 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8838 default:
8839 return false;
8843 emit_insn (gen (out, in0, in1));
8844 return true;
8847 /* Recognize patterns suitable for the UZP instructions. */
8848 static bool
8849 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8851 unsigned int i, odd, mask, nelt = d->nelt;
8852 rtx out, in0, in1, x;
8853 rtx (*gen) (rtx, rtx, rtx);
8854 enum machine_mode vmode = d->vmode;
8856 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8857 return false;
8859 /* Note that these are little-endian tests.
8860 We correct for big-endian later. */
8861 if (d->perm[0] == 0)
8862 odd = 0;
8863 else if (d->perm[0] == 1)
8864 odd = 1;
8865 else
8866 return false;
8867 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8869 for (i = 0; i < nelt; i++)
8871 unsigned elt = (i * 2 + odd) & mask;
8872 if (d->perm[i] != elt)
8873 return false;
8876 /* Success! */
8877 if (d->testing_p)
8878 return true;
8880 in0 = d->op0;
8881 in1 = d->op1;
8882 if (BYTES_BIG_ENDIAN)
8884 x = in0, in0 = in1, in1 = x;
8885 odd = !odd;
8887 out = d->target;
8889 if (odd)
8891 switch (vmode)
8893 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8894 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8895 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8896 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8897 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8898 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8899 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8900 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8901 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8902 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8903 default:
8904 return false;
8907 else
8909 switch (vmode)
8911 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8912 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8913 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8914 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8915 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8916 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8917 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8918 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8919 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8920 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8921 default:
8922 return false;
8926 emit_insn (gen (out, in0, in1));
8927 return true;
8930 /* Recognize patterns suitable for the ZIP instructions. */
8931 static bool
8932 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8934 unsigned int i, high, mask, nelt = d->nelt;
8935 rtx out, in0, in1, x;
8936 rtx (*gen) (rtx, rtx, rtx);
8937 enum machine_mode vmode = d->vmode;
8939 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8940 return false;
8942 /* Note that these are little-endian tests.
8943 We correct for big-endian later. */
8944 high = nelt / 2;
8945 if (d->perm[0] == high)
8946 /* Do Nothing. */
8948 else if (d->perm[0] == 0)
8949 high = 0;
8950 else
8951 return false;
8952 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8954 for (i = 0; i < nelt / 2; i++)
8956 unsigned elt = (i + high) & mask;
8957 if (d->perm[i * 2] != elt)
8958 return false;
8959 elt = (elt + nelt) & mask;
8960 if (d->perm[i * 2 + 1] != elt)
8961 return false;
8964 /* Success! */
8965 if (d->testing_p)
8966 return true;
8968 in0 = d->op0;
8969 in1 = d->op1;
8970 if (BYTES_BIG_ENDIAN)
8972 x = in0, in0 = in1, in1 = x;
8973 high = !high;
8975 out = d->target;
8977 if (high)
8979 switch (vmode)
8981 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8982 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8983 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8984 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8985 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8986 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8987 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8988 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8989 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8990 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8991 default:
8992 return false;
8995 else
8997 switch (vmode)
8999 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9000 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9001 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9002 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9003 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9004 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9005 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9006 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9007 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9008 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9009 default:
9010 return false;
9014 emit_insn (gen (out, in0, in1));
9015 return true;
9018 /* Recognize patterns for the EXT insn. */
9020 static bool
9021 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9023 unsigned int i, nelt = d->nelt;
9024 rtx (*gen) (rtx, rtx, rtx, rtx);
9025 rtx offset;
9027 unsigned int location = d->perm[0]; /* Always < nelt. */
9029 /* Check if the extracted indices are increasing by one. */
9030 for (i = 1; i < nelt; i++)
9032 unsigned int required = location + i;
9033 if (d->one_vector_p)
9035 /* We'll pass the same vector in twice, so allow indices to wrap. */
9036 required &= (nelt - 1);
9038 if (d->perm[i] != required)
9039 return false;
9042 switch (d->vmode)
9044 case V16QImode: gen = gen_aarch64_extv16qi; break;
9045 case V8QImode: gen = gen_aarch64_extv8qi; break;
9046 case V4HImode: gen = gen_aarch64_extv4hi; break;
9047 case V8HImode: gen = gen_aarch64_extv8hi; break;
9048 case V2SImode: gen = gen_aarch64_extv2si; break;
9049 case V4SImode: gen = gen_aarch64_extv4si; break;
9050 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9051 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9052 case V2DImode: gen = gen_aarch64_extv2di; break;
9053 case V2DFmode: gen = gen_aarch64_extv2df; break;
9054 default:
9055 return false;
9058 /* Success! */
9059 if (d->testing_p)
9060 return true;
9062 /* The case where (location == 0) is a no-op for both big- and little-endian,
9063 and is removed by the mid-end at optimization levels -O1 and higher. */
9065 if (BYTES_BIG_ENDIAN && (location != 0))
9067 /* After setup, we want the high elements of the first vector (stored
9068 at the LSB end of the register), and the low elements of the second
9069 vector (stored at the MSB end of the register). So swap. */
9070 rtx temp = d->op0;
9071 d->op0 = d->op1;
9072 d->op1 = temp;
9073 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9074 location = nelt - location;
9077 offset = GEN_INT (location);
9078 emit_insn (gen (d->target, d->op0, d->op1, offset));
9079 return true;
9082 static bool
9083 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9085 rtx (*gen) (rtx, rtx, rtx);
9086 rtx out = d->target;
9087 rtx in0;
9088 enum machine_mode vmode = d->vmode;
9089 unsigned int i, elt, nelt = d->nelt;
9090 rtx lane;
9092 /* TODO: This may not be big-endian safe. */
9093 if (BYTES_BIG_ENDIAN)
9094 return false;
9096 elt = d->perm[0];
9097 for (i = 1; i < nelt; i++)
9099 if (elt != d->perm[i])
9100 return false;
9103 /* The generic preparation in aarch64_expand_vec_perm_const_1
9104 swaps the operand order and the permute indices if it finds
9105 d->perm[0] to be in the second operand. Thus, we can always
9106 use d->op0 and need not do any extra arithmetic to get the
9107 correct lane number. */
9108 in0 = d->op0;
9109 lane = GEN_INT (elt);
9111 switch (vmode)
9113 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9114 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9115 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9116 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9117 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9118 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9119 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9120 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9121 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9122 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9123 default:
9124 return false;
9127 emit_insn (gen (out, in0, lane));
9128 return true;
9131 static bool
9132 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9134 rtx rperm[MAX_VECT_LEN], sel;
9135 enum machine_mode vmode = d->vmode;
9136 unsigned int i, nelt = d->nelt;
9138 if (d->testing_p)
9139 return true;
9141 /* Generic code will try constant permutation twice. Once with the
9142 original mode and again with the elements lowered to QImode.
9143 So wait and don't do the selector expansion ourselves. */
9144 if (vmode != V8QImode && vmode != V16QImode)
9145 return false;
9147 for (i = 0; i < nelt; ++i)
9149 int nunits = GET_MODE_NUNITS (vmode);
9151 /* If big-endian and two vectors we end up with a weird mixed-endian
9152 mode on NEON. Reverse the index within each word but not the word
9153 itself. */
9154 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9155 : d->perm[i]);
9157 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9158 sel = force_reg (vmode, sel);
9160 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9161 return true;
9164 static bool
9165 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9167 /* The pattern matching functions above are written to look for a small
9168 number to begin the sequence (0, 1, N/2). If we begin with an index
9169 from the second operand, we can swap the operands. */
9170 if (d->perm[0] >= d->nelt)
9172 unsigned i, nelt = d->nelt;
9173 rtx x;
9175 for (i = 0; i < nelt; ++i)
9176 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
9178 x = d->op0;
9179 d->op0 = d->op1;
9180 d->op1 = x;
9183 if (TARGET_SIMD)
9185 if (aarch64_evpc_ext (d))
9186 return true;
9187 else if (aarch64_evpc_zip (d))
9188 return true;
9189 else if (aarch64_evpc_uzp (d))
9190 return true;
9191 else if (aarch64_evpc_trn (d))
9192 return true;
9193 else if (aarch64_evpc_dup (d))
9194 return true;
9195 return aarch64_evpc_tbl (d);
9197 return false;
9200 /* Expand a vec_perm_const pattern. */
9202 bool
9203 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9205 struct expand_vec_perm_d d;
9206 int i, nelt, which;
9208 d.target = target;
9209 d.op0 = op0;
9210 d.op1 = op1;
9212 d.vmode = GET_MODE (target);
9213 gcc_assert (VECTOR_MODE_P (d.vmode));
9214 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9215 d.testing_p = false;
9217 for (i = which = 0; i < nelt; ++i)
9219 rtx e = XVECEXP (sel, 0, i);
9220 int ei = INTVAL (e) & (2 * nelt - 1);
9221 which |= (ei < nelt ? 1 : 2);
9222 d.perm[i] = ei;
9225 switch (which)
9227 default:
9228 gcc_unreachable ();
9230 case 3:
9231 d.one_vector_p = false;
9232 if (!rtx_equal_p (op0, op1))
9233 break;
9235 /* The elements of PERM do not suggest that only the first operand
9236 is used, but both operands are identical. Allow easier matching
9237 of the permutation by folding the permutation into the single
9238 input vector. */
9239 /* Fall Through. */
9240 case 2:
9241 for (i = 0; i < nelt; ++i)
9242 d.perm[i] &= nelt - 1;
9243 d.op0 = op1;
9244 d.one_vector_p = true;
9245 break;
9247 case 1:
9248 d.op1 = op0;
9249 d.one_vector_p = true;
9250 break;
9253 return aarch64_expand_vec_perm_const_1 (&d);
9256 static bool
9257 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9258 const unsigned char *sel)
9260 struct expand_vec_perm_d d;
9261 unsigned int i, nelt, which;
9262 bool ret;
9264 d.vmode = vmode;
9265 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9266 d.testing_p = true;
9267 memcpy (d.perm, sel, nelt);
9269 /* Calculate whether all elements are in one vector. */
9270 for (i = which = 0; i < nelt; ++i)
9272 unsigned char e = d.perm[i];
9273 gcc_assert (e < 2 * nelt);
9274 which |= (e < nelt ? 1 : 2);
9277 /* If all elements are from the second vector, reindex as if from the
9278 first vector. */
9279 if (which == 2)
9280 for (i = 0; i < nelt; ++i)
9281 d.perm[i] -= nelt;
9283 /* Check whether the mask can be applied to a single vector. */
9284 d.one_vector_p = (which != 3);
9286 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9287 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9288 if (!d.one_vector_p)
9289 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9291 start_sequence ();
9292 ret = aarch64_expand_vec_perm_const_1 (&d);
9293 end_sequence ();
9295 return ret;
9298 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9299 bool
9300 aarch64_cannot_change_mode_class (enum machine_mode from,
9301 enum machine_mode to,
9302 enum reg_class rclass)
9304 /* Full-reg subregs are allowed on general regs or any class if they are
9305 the same size. */
9306 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9307 || !reg_classes_intersect_p (FP_REGS, rclass))
9308 return false;
9310 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9311 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9312 2. Scalar to Scalar for integer modes or same size float modes.
9313 3. Vector to Vector modes.
9314 4. On little-endian only, Vector-Structure to Vector modes. */
9315 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9317 if (aarch64_vector_mode_supported_p (from)
9318 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9319 return false;
9321 if (GET_MODE_NUNITS (from) == 1
9322 && GET_MODE_NUNITS (to) == 1
9323 && (GET_MODE_CLASS (from) == MODE_INT
9324 || from == to))
9325 return false;
9327 if (aarch64_vector_mode_supported_p (from)
9328 && aarch64_vector_mode_supported_p (to))
9329 return false;
9331 /* Within an vector structure straddling multiple vector registers
9332 we are in a mixed-endian representation. As such, we can't
9333 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9334 switch between vectors and vector structures cheaply. */
9335 if (!BYTES_BIG_ENDIAN)
9336 if ((aarch64_vector_mode_supported_p (from)
9337 && aarch64_vect_struct_mode_p (to))
9338 || (aarch64_vector_mode_supported_p (to)
9339 && aarch64_vect_struct_mode_p (from)))
9340 return false;
9343 return true;
9346 /* Implement MODES_TIEABLE_P. */
9348 bool
9349 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9351 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9352 return true;
9354 /* We specifically want to allow elements of "structure" modes to
9355 be tieable to the structure. This more general condition allows
9356 other rarer situations too. */
9357 if (TARGET_SIMD
9358 && aarch64_vector_mode_p (mode1)
9359 && aarch64_vector_mode_p (mode2))
9360 return true;
9362 return false;
9365 #undef TARGET_ADDRESS_COST
9366 #define TARGET_ADDRESS_COST aarch64_address_cost
9368 /* This hook will determines whether unnamed bitfields affect the alignment
9369 of the containing structure. The hook returns true if the structure
9370 should inherit the alignment requirements of an unnamed bitfield's
9371 type. */
9372 #undef TARGET_ALIGN_ANON_BITFIELD
9373 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9375 #undef TARGET_ASM_ALIGNED_DI_OP
9376 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9378 #undef TARGET_ASM_ALIGNED_HI_OP
9379 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9381 #undef TARGET_ASM_ALIGNED_SI_OP
9382 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9386 hook_bool_const_tree_hwi_hwi_const_tree_true
9388 #undef TARGET_ASM_FILE_START
9389 #define TARGET_ASM_FILE_START aarch64_start_file
9391 #undef TARGET_ASM_OUTPUT_MI_THUNK
9392 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9394 #undef TARGET_ASM_SELECT_RTX_SECTION
9395 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9397 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9398 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9400 #undef TARGET_BUILD_BUILTIN_VA_LIST
9401 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9403 #undef TARGET_CALLEE_COPIES
9404 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9406 #undef TARGET_CAN_ELIMINATE
9407 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9409 #undef TARGET_CANNOT_FORCE_CONST_MEM
9410 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9412 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9413 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9415 /* Only the least significant bit is used for initialization guard
9416 variables. */
9417 #undef TARGET_CXX_GUARD_MASK_BIT
9418 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9420 #undef TARGET_C_MODE_FOR_SUFFIX
9421 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9423 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9424 #undef TARGET_DEFAULT_TARGET_FLAGS
9425 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9426 #endif
9428 #undef TARGET_CLASS_MAX_NREGS
9429 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9431 #undef TARGET_BUILTIN_DECL
9432 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9434 #undef TARGET_EXPAND_BUILTIN
9435 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9437 #undef TARGET_EXPAND_BUILTIN_VA_START
9438 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9440 #undef TARGET_FOLD_BUILTIN
9441 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9443 #undef TARGET_FUNCTION_ARG
9444 #define TARGET_FUNCTION_ARG aarch64_function_arg
9446 #undef TARGET_FUNCTION_ARG_ADVANCE
9447 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9449 #undef TARGET_FUNCTION_ARG_BOUNDARY
9450 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9453 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9455 #undef TARGET_FUNCTION_VALUE
9456 #define TARGET_FUNCTION_VALUE aarch64_function_value
9458 #undef TARGET_FUNCTION_VALUE_REGNO_P
9459 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9461 #undef TARGET_FRAME_POINTER_REQUIRED
9462 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9464 #undef TARGET_GIMPLE_FOLD_BUILTIN
9465 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9467 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9468 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9470 #undef TARGET_INIT_BUILTINS
9471 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9473 #undef TARGET_LEGITIMATE_ADDRESS_P
9474 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9476 #undef TARGET_LEGITIMATE_CONSTANT_P
9477 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9479 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9480 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9482 #undef TARGET_LRA_P
9483 #define TARGET_LRA_P aarch64_lra_p
9485 #undef TARGET_MANGLE_TYPE
9486 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9488 #undef TARGET_MEMORY_MOVE_COST
9489 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9491 #undef TARGET_MUST_PASS_IN_STACK
9492 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9494 /* This target hook should return true if accesses to volatile bitfields
9495 should use the narrowest mode possible. It should return false if these
9496 accesses should use the bitfield container type. */
9497 #undef TARGET_NARROW_VOLATILE_BITFIELD
9498 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9500 #undef TARGET_OPTION_OVERRIDE
9501 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9503 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9504 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9505 aarch64_override_options_after_change
9507 #undef TARGET_PASS_BY_REFERENCE
9508 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9510 #undef TARGET_PREFERRED_RELOAD_CLASS
9511 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9513 #undef TARGET_SECONDARY_RELOAD
9514 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9516 #undef TARGET_SHIFT_TRUNCATION_MASK
9517 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9519 #undef TARGET_SETUP_INCOMING_VARARGS
9520 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9522 #undef TARGET_STRUCT_VALUE_RTX
9523 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9525 #undef TARGET_REGISTER_MOVE_COST
9526 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9528 #undef TARGET_RETURN_IN_MEMORY
9529 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9531 #undef TARGET_RETURN_IN_MSB
9532 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9534 #undef TARGET_RTX_COSTS
9535 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9537 #undef TARGET_SCHED_ISSUE_RATE
9538 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9540 #undef TARGET_TRAMPOLINE_INIT
9541 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9543 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9544 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9547 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9549 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9550 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9552 #undef TARGET_VECTORIZE_ADD_STMT_COST
9553 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9555 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9556 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9557 aarch64_builtin_vectorization_cost
9559 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9560 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9562 #undef TARGET_VECTORIZE_BUILTINS
9563 #define TARGET_VECTORIZE_BUILTINS
9565 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9566 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9567 aarch64_builtin_vectorized_function
9569 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9570 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9571 aarch64_autovectorize_vector_sizes
9573 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9574 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9575 aarch64_atomic_assign_expand_fenv
9577 /* Section anchor support. */
9579 #undef TARGET_MIN_ANCHOR_OFFSET
9580 #define TARGET_MIN_ANCHOR_OFFSET -256
9582 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9583 byte offset; we can do much more for larger data types, but have no way
9584 to determine the size of the access. We assume accesses are aligned. */
9585 #undef TARGET_MAX_ANCHOR_OFFSET
9586 #define TARGET_MAX_ANCHOR_OFFSET 4095
9588 #undef TARGET_VECTOR_ALIGNMENT
9589 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9591 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9592 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9593 aarch64_simd_vector_alignment_reachable
9595 /* vec_perm support. */
9597 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9598 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9599 aarch64_vectorize_vec_perm_const_ok
9602 #undef TARGET_FIXED_CONDITION_CODE_REGS
9603 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9605 struct gcc_target targetm = TARGET_INITIALIZER;
9607 #include "gt-aarch64.h"