[AArch64] Add cost handling of CALLER_SAVE_REGS and POINTER_REGS
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob6f21fd9239bbf6325729ce59bb2bf708ee8a9faa
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "hash-table.h"
50 #include "vec.h"
51 #include "basic-block.h"
52 #include "tree-ssa-alias.h"
53 #include "internal-fn.h"
54 #include "gimple-fold.h"
55 #include "tree-eh.h"
56 #include "gimple-expr.h"
57 #include "is-a.h"
58 #include "gimple.h"
59 #include "gimplify.h"
60 #include "optabs.h"
61 #include "dwarf2.h"
62 #include "cfgloop.h"
63 #include "tree-vectorizer.h"
64 #include "config/arm/aarch-cost-tables.h"
65 #include "dumpfile.h"
66 #include "builtins.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
114 rtx value;
115 int shift;
116 int element_width;
117 bool mvn;
118 bool msl;
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
124 #ifdef HAVE_AS_TLS
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
127 #endif
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
141 const unsigned char *sel);
142 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
144 /* The processor for which instructions should be scheduled. */
145 enum aarch64_processor aarch64_tune = cortexa53;
147 /* The current tuning set. */
148 const struct tune_params *aarch64_tune_params;
150 /* Mask to specify which instructions we are allowed to generate. */
151 unsigned long aarch64_isa_flags = 0;
153 /* Mask to specify which instruction scheduling options should be used. */
154 unsigned long aarch64_tune_flags = 0;
156 /* Tuning parameters. */
158 #if HAVE_DESIGNATED_INITIALIZERS
159 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
160 #else
161 #define NAMED_PARAM(NAME, VAL) (VAL)
162 #endif
164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
165 __extension__
166 #endif
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
171 static const struct cpu_addrcost_table generic_addrcost_table =
173 #if HAVE_DESIGNATED_INITIALIZERS
174 .addr_scale_costs =
175 #endif
177 NAMED_PARAM (hi, 0),
178 NAMED_PARAM (si, 0),
179 NAMED_PARAM (di, 0),
180 NAMED_PARAM (ti, 0),
182 NAMED_PARAM (pre_modify, 0),
183 NAMED_PARAM (post_modify, 0),
184 NAMED_PARAM (register_offset, 0),
185 NAMED_PARAM (register_extend, 0),
186 NAMED_PARAM (imm_offset, 0)
189 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
190 __extension__
191 #endif
192 static const struct cpu_addrcost_table cortexa57_addrcost_table =
194 #if HAVE_DESIGNATED_INITIALIZERS
195 .addr_scale_costs =
196 #endif
198 NAMED_PARAM (hi, 1),
199 NAMED_PARAM (si, 0),
200 NAMED_PARAM (di, 0),
201 NAMED_PARAM (ti, 1),
203 NAMED_PARAM (pre_modify, 0),
204 NAMED_PARAM (post_modify, 0),
205 NAMED_PARAM (register_offset, 0),
206 NAMED_PARAM (register_extend, 0),
207 NAMED_PARAM (imm_offset, 0),
210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
211 __extension__
212 #endif
213 static const struct cpu_regmove_cost generic_regmove_cost =
215 NAMED_PARAM (GP2GP, 1),
216 NAMED_PARAM (GP2FP, 2),
217 NAMED_PARAM (FP2GP, 2),
218 /* We currently do not provide direct support for TFmode Q->Q move.
219 Therefore we need to raise the cost above 2 in order to have
220 reload handle the situation. */
221 NAMED_PARAM (FP2FP, 4)
224 /* Generic costs for vector insn classes. */
225 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
226 __extension__
227 #endif
228 static const struct cpu_vector_cost generic_vector_cost =
230 NAMED_PARAM (scalar_stmt_cost, 1),
231 NAMED_PARAM (scalar_load_cost, 1),
232 NAMED_PARAM (scalar_store_cost, 1),
233 NAMED_PARAM (vec_stmt_cost, 1),
234 NAMED_PARAM (vec_to_scalar_cost, 1),
235 NAMED_PARAM (scalar_to_vec_cost, 1),
236 NAMED_PARAM (vec_align_load_cost, 1),
237 NAMED_PARAM (vec_unalign_load_cost, 1),
238 NAMED_PARAM (vec_unalign_store_cost, 1),
239 NAMED_PARAM (vec_store_cost, 1),
240 NAMED_PARAM (cond_taken_branch_cost, 3),
241 NAMED_PARAM (cond_not_taken_branch_cost, 1)
244 /* Generic costs for vector insn classes. */
245 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
246 __extension__
247 #endif
248 static const struct cpu_vector_cost cortexa57_vector_cost =
250 NAMED_PARAM (scalar_stmt_cost, 1),
251 NAMED_PARAM (scalar_load_cost, 4),
252 NAMED_PARAM (scalar_store_cost, 1),
253 NAMED_PARAM (vec_stmt_cost, 3),
254 NAMED_PARAM (vec_to_scalar_cost, 8),
255 NAMED_PARAM (scalar_to_vec_cost, 8),
256 NAMED_PARAM (vec_align_load_cost, 5),
257 NAMED_PARAM (vec_unalign_load_cost, 5),
258 NAMED_PARAM (vec_unalign_store_cost, 1),
259 NAMED_PARAM (vec_store_cost, 1),
260 NAMED_PARAM (cond_taken_branch_cost, 1),
261 NAMED_PARAM (cond_not_taken_branch_cost, 1)
264 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
265 __extension__
266 #endif
267 static const struct tune_params generic_tunings =
269 &cortexa57_extra_costs,
270 &generic_addrcost_table,
271 &generic_regmove_cost,
272 &generic_vector_cost,
273 NAMED_PARAM (memmov_cost, 4),
274 NAMED_PARAM (issue_rate, 2)
277 static const struct tune_params cortexa53_tunings =
279 &cortexa53_extra_costs,
280 &generic_addrcost_table,
281 &generic_regmove_cost,
282 &generic_vector_cost,
283 NAMED_PARAM (memmov_cost, 4),
284 NAMED_PARAM (issue_rate, 2)
287 static const struct tune_params cortexa57_tunings =
289 &cortexa57_extra_costs,
290 &cortexa57_addrcost_table,
291 &generic_regmove_cost,
292 &cortexa57_vector_cost,
293 NAMED_PARAM (memmov_cost, 4),
294 NAMED_PARAM (issue_rate, 3)
297 /* A processor implementing AArch64. */
298 struct processor
300 const char *const name;
301 enum aarch64_processor core;
302 const char *arch;
303 const unsigned long flags;
304 const struct tune_params *const tune;
307 /* Processor cores implementing AArch64. */
308 static const struct processor all_cores[] =
310 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
311 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
312 #include "aarch64-cores.def"
313 #undef AARCH64_CORE
314 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
315 {NULL, aarch64_none, NULL, 0, NULL}
318 /* Architectures implementing AArch64. */
319 static const struct processor all_architectures[] =
321 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
322 {NAME, CORE, #ARCH, FLAGS, NULL},
323 #include "aarch64-arches.def"
324 #undef AARCH64_ARCH
325 {NULL, aarch64_none, NULL, 0, NULL}
328 /* Target specification. These are populated as commandline arguments
329 are processed, or NULL if not specified. */
330 static const struct processor *selected_arch;
331 static const struct processor *selected_cpu;
332 static const struct processor *selected_tune;
334 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
336 /* An ISA extension in the co-processor and main instruction set space. */
337 struct aarch64_option_extension
339 const char *const name;
340 const unsigned long flags_on;
341 const unsigned long flags_off;
344 /* ISA extensions in AArch64. */
345 static const struct aarch64_option_extension all_extensions[] =
347 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
348 {NAME, FLAGS_ON, FLAGS_OFF},
349 #include "aarch64-option-extensions.def"
350 #undef AARCH64_OPT_EXTENSION
351 {NULL, 0, 0}
354 /* Used to track the size of an address when generating a pre/post
355 increment address. */
356 static enum machine_mode aarch64_memory_reference_mode;
358 /* Used to force GTY into this file. */
359 static GTY(()) int gty_dummy;
361 /* A table of valid AArch64 "bitmask immediate" values for
362 logical instructions. */
364 #define AARCH64_NUM_BITMASKS 5334
365 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
367 typedef enum aarch64_cond_code
369 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
370 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
371 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
373 aarch64_cc;
375 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
377 /* The condition codes of the processor, and the inverse function. */
378 static const char * const aarch64_condition_codes[] =
380 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
381 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
384 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
385 unsigned
386 aarch64_dbx_register_number (unsigned regno)
388 if (GP_REGNUM_P (regno))
389 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
390 else if (regno == SP_REGNUM)
391 return AARCH64_DWARF_SP;
392 else if (FP_REGNUM_P (regno))
393 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
395 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
396 equivalent DWARF register. */
397 return DWARF_FRAME_REGISTERS;
400 /* Return TRUE if MODE is any of the large INT modes. */
401 static bool
402 aarch64_vect_struct_mode_p (enum machine_mode mode)
404 return mode == OImode || mode == CImode || mode == XImode;
407 /* Return TRUE if MODE is any of the vector modes. */
408 static bool
409 aarch64_vector_mode_p (enum machine_mode mode)
411 return aarch64_vector_mode_supported_p (mode)
412 || aarch64_vect_struct_mode_p (mode);
415 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
416 static bool
417 aarch64_array_mode_supported_p (enum machine_mode mode,
418 unsigned HOST_WIDE_INT nelems)
420 if (TARGET_SIMD
421 && AARCH64_VALID_SIMD_QREG_MODE (mode)
422 && (nelems >= 2 && nelems <= 4))
423 return true;
425 return false;
428 /* Implement HARD_REGNO_NREGS. */
431 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
433 switch (aarch64_regno_regclass (regno))
435 case FP_REGS:
436 case FP_LO_REGS:
437 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
438 default:
439 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
441 gcc_unreachable ();
444 /* Implement HARD_REGNO_MODE_OK. */
447 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
449 if (GET_MODE_CLASS (mode) == MODE_CC)
450 return regno == CC_REGNUM;
452 if (regno == SP_REGNUM)
453 /* The purpose of comparing with ptr_mode is to support the
454 global register variable associated with the stack pointer
455 register via the syntax of asm ("wsp") in ILP32. */
456 return mode == Pmode || mode == ptr_mode;
458 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
459 return mode == Pmode;
461 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
462 return 1;
464 if (FP_REGNUM_P (regno))
466 if (aarch64_vect_struct_mode_p (mode))
467 return
468 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
469 else
470 return 1;
473 return 0;
476 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
477 enum machine_mode
478 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
479 enum machine_mode mode)
481 /* Handle modes that fit within single registers. */
482 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
484 if (GET_MODE_SIZE (mode) >= 4)
485 return mode;
486 else
487 return SImode;
489 /* Fall back to generic for multi-reg and very large modes. */
490 else
491 return choose_hard_reg_mode (regno, nregs, false);
494 /* Return true if calls to DECL should be treated as
495 long-calls (ie called via a register). */
496 static bool
497 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
499 return false;
502 /* Return true if calls to symbol-ref SYM should be treated as
503 long-calls (ie called via a register). */
504 bool
505 aarch64_is_long_call_p (rtx sym)
507 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
510 /* Return true if the offsets to a zero/sign-extract operation
511 represent an expression that matches an extend operation. The
512 operands represent the paramters from
514 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
515 bool
516 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
517 rtx extract_imm)
519 HOST_WIDE_INT mult_val, extract_val;
521 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
522 return false;
524 mult_val = INTVAL (mult_imm);
525 extract_val = INTVAL (extract_imm);
527 if (extract_val > 8
528 && extract_val < GET_MODE_BITSIZE (mode)
529 && exact_log2 (extract_val & ~7) > 0
530 && (extract_val & 7) <= 4
531 && mult_val == (1 << (extract_val & 7)))
532 return true;
534 return false;
537 /* Emit an insn that's a simple single-set. Both the operands must be
538 known to be valid. */
539 inline static rtx
540 emit_set_insn (rtx x, rtx y)
542 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
545 /* X and Y are two things to compare using CODE. Emit the compare insn and
546 return the rtx for register 0 in the proper mode. */
548 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
550 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
551 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
553 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
554 return cc_reg;
557 /* Build the SYMBOL_REF for __tls_get_addr. */
559 static GTY(()) rtx tls_get_addr_libfunc;
562 aarch64_tls_get_addr (void)
564 if (!tls_get_addr_libfunc)
565 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
566 return tls_get_addr_libfunc;
569 /* Return the TLS model to use for ADDR. */
571 static enum tls_model
572 tls_symbolic_operand_type (rtx addr)
574 enum tls_model tls_kind = TLS_MODEL_NONE;
575 rtx sym, addend;
577 if (GET_CODE (addr) == CONST)
579 split_const (addr, &sym, &addend);
580 if (GET_CODE (sym) == SYMBOL_REF)
581 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
583 else if (GET_CODE (addr) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
586 return tls_kind;
589 /* We'll allow lo_sum's in addresses in our legitimate addresses
590 so that combine would take care of combining addresses where
591 necessary, but for generation purposes, we'll generate the address
592 as :
593 RTL Absolute
594 tmp = hi (symbol_ref); adrp x1, foo
595 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
598 PIC TLS
599 adrp x1, :got:foo adrp tmp, :tlsgd:foo
600 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
601 bl __tls_get_addr
604 Load TLS symbol, depending on TLS mechanism and TLS access model.
606 Global Dynamic - Traditional TLS:
607 adrp tmp, :tlsgd:imm
608 add dest, tmp, #:tlsgd_lo12:imm
609 bl __tls_get_addr
611 Global Dynamic - TLS Descriptors:
612 adrp dest, :tlsdesc:imm
613 ldr tmp, [dest, #:tlsdesc_lo12:imm]
614 add dest, dest, #:tlsdesc_lo12:imm
615 blr tmp
616 mrs tp, tpidr_el0
617 add dest, dest, tp
619 Initial Exec:
620 mrs tp, tpidr_el0
621 adrp tmp, :gottprel:imm
622 ldr dest, [tmp, #:gottprel_lo12:imm]
623 add dest, dest, tp
625 Local Exec:
626 mrs tp, tpidr_el0
627 add t0, tp, #:tprel_hi12:imm
628 add t0, #:tprel_lo12_nc:imm
631 static void
632 aarch64_load_symref_appropriately (rtx dest, rtx imm,
633 enum aarch64_symbol_type type)
635 switch (type)
637 case SYMBOL_SMALL_ABSOLUTE:
639 /* In ILP32, the mode of dest can be either SImode or DImode. */
640 rtx tmp_reg = dest;
641 enum machine_mode mode = GET_MODE (dest);
643 gcc_assert (mode == Pmode || mode == ptr_mode);
645 if (can_create_pseudo_p ())
646 tmp_reg = gen_reg_rtx (mode);
648 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
649 emit_insn (gen_add_losym (dest, tmp_reg, imm));
650 return;
653 case SYMBOL_TINY_ABSOLUTE:
654 emit_insn (gen_rtx_SET (Pmode, dest, imm));
655 return;
657 case SYMBOL_SMALL_GOT:
659 /* In ILP32, the mode of dest can be either SImode or DImode,
660 while the got entry is always of SImode size. The mode of
661 dest depends on how dest is used: if dest is assigned to a
662 pointer (e.g. in the memory), it has SImode; it may have
663 DImode if dest is dereferenced to access the memeory.
664 This is why we have to handle three different ldr_got_small
665 patterns here (two patterns for ILP32). */
666 rtx tmp_reg = dest;
667 enum machine_mode mode = GET_MODE (dest);
669 if (can_create_pseudo_p ())
670 tmp_reg = gen_reg_rtx (mode);
672 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
673 if (mode == ptr_mode)
675 if (mode == DImode)
676 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
677 else
678 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
680 else
682 gcc_assert (mode == Pmode);
683 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
686 return;
689 case SYMBOL_SMALL_TLSGD:
691 rtx_insn *insns;
692 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
694 start_sequence ();
695 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
696 insns = get_insns ();
697 end_sequence ();
699 RTL_CONST_CALL_P (insns) = 1;
700 emit_libcall_block (insns, dest, result, imm);
701 return;
704 case SYMBOL_SMALL_TLSDESC:
706 enum machine_mode mode = GET_MODE (dest);
707 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
708 rtx tp;
710 gcc_assert (mode == Pmode || mode == ptr_mode);
712 /* In ILP32, the got entry is always of SImode size. Unlike
713 small GOT, the dest is fixed at reg 0. */
714 if (TARGET_ILP32)
715 emit_insn (gen_tlsdesc_small_si (imm));
716 else
717 emit_insn (gen_tlsdesc_small_di (imm));
718 tp = aarch64_load_tp (NULL);
720 if (mode != Pmode)
721 tp = gen_lowpart (mode, tp);
723 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
724 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
725 return;
728 case SYMBOL_SMALL_GOTTPREL:
730 /* In ILP32, the mode of dest can be either SImode or DImode,
731 while the got entry is always of SImode size. The mode of
732 dest depends on how dest is used: if dest is assigned to a
733 pointer (e.g. in the memory), it has SImode; it may have
734 DImode if dest is dereferenced to access the memeory.
735 This is why we have to handle three different tlsie_small
736 patterns here (two patterns for ILP32). */
737 enum machine_mode mode = GET_MODE (dest);
738 rtx tmp_reg = gen_reg_rtx (mode);
739 rtx tp = aarch64_load_tp (NULL);
741 if (mode == ptr_mode)
743 if (mode == DImode)
744 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
745 else
747 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
748 tp = gen_lowpart (mode, tp);
751 else
753 gcc_assert (mode == Pmode);
754 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
757 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
758 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
759 return;
762 case SYMBOL_SMALL_TPREL:
764 rtx tp = aarch64_load_tp (NULL);
765 emit_insn (gen_tlsle_small (dest, tp, imm));
766 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
767 return;
770 case SYMBOL_TINY_GOT:
771 emit_insn (gen_ldr_got_tiny (dest, imm));
772 return;
774 default:
775 gcc_unreachable ();
779 /* Emit a move from SRC to DEST. Assume that the move expanders can
780 handle all moves if !can_create_pseudo_p (). The distinction is
781 important because, unlike emit_move_insn, the move expanders know
782 how to force Pmode objects into the constant pool even when the
783 constant pool address is not itself legitimate. */
784 static rtx
785 aarch64_emit_move (rtx dest, rtx src)
787 return (can_create_pseudo_p ()
788 ? emit_move_insn (dest, src)
789 : emit_move_insn_1 (dest, src));
792 /* Split a 128-bit move operation into two 64-bit move operations,
793 taking care to handle partial overlap of register to register
794 copies. Special cases are needed when moving between GP regs and
795 FP regs. SRC can be a register, constant or memory; DST a register
796 or memory. If either operand is memory it must not have any side
797 effects. */
798 void
799 aarch64_split_128bit_move (rtx dst, rtx src)
801 rtx dst_lo, dst_hi;
802 rtx src_lo, src_hi;
804 enum machine_mode mode = GET_MODE (dst);
806 gcc_assert (mode == TImode || mode == TFmode);
807 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
808 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
810 if (REG_P (dst) && REG_P (src))
812 int src_regno = REGNO (src);
813 int dst_regno = REGNO (dst);
815 /* Handle FP <-> GP regs. */
816 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
818 src_lo = gen_lowpart (word_mode, src);
819 src_hi = gen_highpart (word_mode, src);
821 if (mode == TImode)
823 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
824 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
826 else
828 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
829 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
831 return;
833 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
835 dst_lo = gen_lowpart (word_mode, dst);
836 dst_hi = gen_highpart (word_mode, dst);
838 if (mode == TImode)
840 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
841 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
843 else
845 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
846 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
848 return;
852 dst_lo = gen_lowpart (word_mode, dst);
853 dst_hi = gen_highpart (word_mode, dst);
854 src_lo = gen_lowpart (word_mode, src);
855 src_hi = gen_highpart_mode (word_mode, mode, src);
857 /* At most one pairing may overlap. */
858 if (reg_overlap_mentioned_p (dst_lo, src_hi))
860 aarch64_emit_move (dst_hi, src_hi);
861 aarch64_emit_move (dst_lo, src_lo);
863 else
865 aarch64_emit_move (dst_lo, src_lo);
866 aarch64_emit_move (dst_hi, src_hi);
870 bool
871 aarch64_split_128bit_move_p (rtx dst, rtx src)
873 return (! REG_P (src)
874 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
877 /* Split a complex SIMD combine. */
879 void
880 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
882 enum machine_mode src_mode = GET_MODE (src1);
883 enum machine_mode dst_mode = GET_MODE (dst);
885 gcc_assert (VECTOR_MODE_P (dst_mode));
887 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
889 rtx (*gen) (rtx, rtx, rtx);
891 switch (src_mode)
893 case V8QImode:
894 gen = gen_aarch64_simd_combinev8qi;
895 break;
896 case V4HImode:
897 gen = gen_aarch64_simd_combinev4hi;
898 break;
899 case V2SImode:
900 gen = gen_aarch64_simd_combinev2si;
901 break;
902 case V2SFmode:
903 gen = gen_aarch64_simd_combinev2sf;
904 break;
905 case DImode:
906 gen = gen_aarch64_simd_combinedi;
907 break;
908 case DFmode:
909 gen = gen_aarch64_simd_combinedf;
910 break;
911 default:
912 gcc_unreachable ();
915 emit_insn (gen (dst, src1, src2));
916 return;
920 /* Split a complex SIMD move. */
922 void
923 aarch64_split_simd_move (rtx dst, rtx src)
925 enum machine_mode src_mode = GET_MODE (src);
926 enum machine_mode dst_mode = GET_MODE (dst);
928 gcc_assert (VECTOR_MODE_P (dst_mode));
930 if (REG_P (dst) && REG_P (src))
932 rtx (*gen) (rtx, rtx);
934 gcc_assert (VECTOR_MODE_P (src_mode));
936 switch (src_mode)
938 case V16QImode:
939 gen = gen_aarch64_split_simd_movv16qi;
940 break;
941 case V8HImode:
942 gen = gen_aarch64_split_simd_movv8hi;
943 break;
944 case V4SImode:
945 gen = gen_aarch64_split_simd_movv4si;
946 break;
947 case V2DImode:
948 gen = gen_aarch64_split_simd_movv2di;
949 break;
950 case V4SFmode:
951 gen = gen_aarch64_split_simd_movv4sf;
952 break;
953 case V2DFmode:
954 gen = gen_aarch64_split_simd_movv2df;
955 break;
956 default:
957 gcc_unreachable ();
960 emit_insn (gen (dst, src));
961 return;
965 static rtx
966 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
968 if (can_create_pseudo_p ())
969 return force_reg (mode, value);
970 else
972 x = aarch64_emit_move (x, value);
973 return x;
978 static rtx
979 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
981 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
983 rtx high;
984 /* Load the full offset into a register. This
985 might be improvable in the future. */
986 high = GEN_INT (offset);
987 offset = 0;
988 high = aarch64_force_temporary (mode, temp, high);
989 reg = aarch64_force_temporary (mode, temp,
990 gen_rtx_PLUS (mode, high, reg));
992 return plus_constant (mode, reg, offset);
995 void
996 aarch64_expand_mov_immediate (rtx dest, rtx imm)
998 enum machine_mode mode = GET_MODE (dest);
999 unsigned HOST_WIDE_INT mask;
1000 int i;
1001 bool first;
1002 unsigned HOST_WIDE_INT val;
1003 bool subtargets;
1004 rtx subtarget;
1005 int one_match, zero_match, first_not_ffff_match;
1007 gcc_assert (mode == SImode || mode == DImode);
1009 /* Check on what type of symbol it is. */
1010 if (GET_CODE (imm) == SYMBOL_REF
1011 || GET_CODE (imm) == LABEL_REF
1012 || GET_CODE (imm) == CONST)
1014 rtx mem, base, offset;
1015 enum aarch64_symbol_type sty;
1017 /* If we have (const (plus symbol offset)), separate out the offset
1018 before we start classifying the symbol. */
1019 split_const (imm, &base, &offset);
1021 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1022 switch (sty)
1024 case SYMBOL_FORCE_TO_MEM:
1025 if (offset != const0_rtx
1026 && targetm.cannot_force_const_mem (mode, imm))
1028 gcc_assert (can_create_pseudo_p ());
1029 base = aarch64_force_temporary (mode, dest, base);
1030 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1031 aarch64_emit_move (dest, base);
1032 return;
1034 mem = force_const_mem (ptr_mode, imm);
1035 gcc_assert (mem);
1036 if (mode != ptr_mode)
1037 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1038 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1039 return;
1041 case SYMBOL_SMALL_TLSGD:
1042 case SYMBOL_SMALL_TLSDESC:
1043 case SYMBOL_SMALL_GOTTPREL:
1044 case SYMBOL_SMALL_GOT:
1045 case SYMBOL_TINY_GOT:
1046 if (offset != const0_rtx)
1048 gcc_assert(can_create_pseudo_p ());
1049 base = aarch64_force_temporary (mode, dest, base);
1050 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1051 aarch64_emit_move (dest, base);
1052 return;
1054 /* FALLTHRU */
1056 case SYMBOL_SMALL_TPREL:
1057 case SYMBOL_SMALL_ABSOLUTE:
1058 case SYMBOL_TINY_ABSOLUTE:
1059 aarch64_load_symref_appropriately (dest, imm, sty);
1060 return;
1062 default:
1063 gcc_unreachable ();
1067 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1069 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1070 return;
1073 if (!CONST_INT_P (imm))
1075 if (GET_CODE (imm) == HIGH)
1076 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1077 else
1079 rtx mem = force_const_mem (mode, imm);
1080 gcc_assert (mem);
1081 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1084 return;
1087 if (mode == SImode)
1089 /* We know we can't do this in 1 insn, and we must be able to do it
1090 in two; so don't mess around looking for sequences that don't buy
1091 us anything. */
1092 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1093 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1094 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1095 return;
1098 /* Remaining cases are all for DImode. */
1100 val = INTVAL (imm);
1101 subtargets = optimize && can_create_pseudo_p ();
1103 one_match = 0;
1104 zero_match = 0;
1105 mask = 0xffff;
1106 first_not_ffff_match = -1;
1108 for (i = 0; i < 64; i += 16, mask <<= 16)
1110 if ((val & mask) == mask)
1111 one_match++;
1112 else
1114 if (first_not_ffff_match < 0)
1115 first_not_ffff_match = i;
1116 if ((val & mask) == 0)
1117 zero_match++;
1121 if (one_match == 2)
1123 /* Set one of the quarters and then insert back into result. */
1124 mask = 0xffffll << first_not_ffff_match;
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1127 GEN_INT ((val >> first_not_ffff_match)
1128 & 0xffff)));
1129 return;
1132 if (zero_match == 2)
1133 goto simple_sequence;
1135 mask = 0x0ffff0000UL;
1136 for (i = 16; i < 64; i += 16, mask <<= 16)
1138 HOST_WIDE_INT comp = mask & ~(mask - 1);
1140 if (aarch64_uimm12_shift (val - (val & mask)))
1142 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1144 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1145 emit_insn (gen_adddi3 (dest, subtarget,
1146 GEN_INT (val - (val & mask))));
1147 return;
1149 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1151 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1153 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1154 GEN_INT ((val + comp) & mask)));
1155 emit_insn (gen_adddi3 (dest, subtarget,
1156 GEN_INT (val - ((val + comp) & mask))));
1157 return;
1159 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1161 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1163 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1164 GEN_INT ((val - comp) | ~mask)));
1165 emit_insn (gen_adddi3 (dest, subtarget,
1166 GEN_INT (val - ((val - comp) | ~mask))));
1167 return;
1169 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1171 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1173 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1174 GEN_INT (val | ~mask)));
1175 emit_insn (gen_adddi3 (dest, subtarget,
1176 GEN_INT (val - (val | ~mask))));
1177 return;
1181 /* See if we can do it by arithmetically combining two
1182 immediates. */
1183 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1185 int j;
1186 mask = 0xffff;
1188 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1189 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1191 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1192 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1193 GEN_INT (aarch64_bitmasks[i])));
1194 emit_insn (gen_adddi3 (dest, subtarget,
1195 GEN_INT (val - aarch64_bitmasks[i])));
1196 return;
1199 for (j = 0; j < 64; j += 16, mask <<= 16)
1201 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1203 emit_insn (gen_rtx_SET (VOIDmode, dest,
1204 GEN_INT (aarch64_bitmasks[i])));
1205 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1206 GEN_INT ((val >> j) & 0xffff)));
1207 return;
1212 /* See if we can do it by logically combining two immediates. */
1213 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1215 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1217 int j;
1219 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1220 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1222 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1223 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1224 GEN_INT (aarch64_bitmasks[i])));
1225 emit_insn (gen_iordi3 (dest, subtarget,
1226 GEN_INT (aarch64_bitmasks[j])));
1227 return;
1230 else if ((val & aarch64_bitmasks[i]) == val)
1232 int j;
1234 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1235 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1238 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1239 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1240 GEN_INT (aarch64_bitmasks[j])));
1241 emit_insn (gen_anddi3 (dest, subtarget,
1242 GEN_INT (aarch64_bitmasks[i])));
1243 return;
1248 if (one_match > zero_match)
1250 /* Set either first three quarters or all but the third. */
1251 mask = 0xffffll << (16 - first_not_ffff_match);
1252 emit_insn (gen_rtx_SET (VOIDmode, dest,
1253 GEN_INT (val | mask | 0xffffffff00000000ull)));
1255 /* Now insert other two quarters. */
1256 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1257 i < 64; i += 16, mask <<= 16)
1259 if ((val & mask) != mask)
1260 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1261 GEN_INT ((val >> i) & 0xffff)));
1263 return;
1266 simple_sequence:
1267 first = true;
1268 mask = 0xffff;
1269 for (i = 0; i < 64; i += 16, mask <<= 16)
1271 if ((val & mask) != 0)
1273 if (first)
1275 emit_insn (gen_rtx_SET (VOIDmode, dest,
1276 GEN_INT (val & mask)));
1277 first = false;
1279 else
1280 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1281 GEN_INT ((val >> i) & 0xffff)));
1286 static bool
1287 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1288 tree exp ATTRIBUTE_UNUSED)
1290 /* Currently, always true. */
1291 return true;
1294 /* Implement TARGET_PASS_BY_REFERENCE. */
1296 static bool
1297 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1298 enum machine_mode mode,
1299 const_tree type,
1300 bool named ATTRIBUTE_UNUSED)
1302 HOST_WIDE_INT size;
1303 enum machine_mode dummymode;
1304 int nregs;
1306 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1307 size = (mode == BLKmode && type)
1308 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1310 /* Aggregates are passed by reference based on their size. */
1311 if (type && AGGREGATE_TYPE_P (type))
1313 size = int_size_in_bytes (type);
1316 /* Variable sized arguments are always returned by reference. */
1317 if (size < 0)
1318 return true;
1320 /* Can this be a candidate to be passed in fp/simd register(s)? */
1321 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1322 &dummymode, &nregs,
1323 NULL))
1324 return false;
1326 /* Arguments which are variable sized or larger than 2 registers are
1327 passed by reference unless they are a homogenous floating point
1328 aggregate. */
1329 return size > 2 * UNITS_PER_WORD;
1332 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1333 static bool
1334 aarch64_return_in_msb (const_tree valtype)
1336 enum machine_mode dummy_mode;
1337 int dummy_int;
1339 /* Never happens in little-endian mode. */
1340 if (!BYTES_BIG_ENDIAN)
1341 return false;
1343 /* Only composite types smaller than or equal to 16 bytes can
1344 be potentially returned in registers. */
1345 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1346 || int_size_in_bytes (valtype) <= 0
1347 || int_size_in_bytes (valtype) > 16)
1348 return false;
1350 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1351 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1352 is always passed/returned in the least significant bits of fp/simd
1353 register(s). */
1354 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1355 &dummy_mode, &dummy_int, NULL))
1356 return false;
1358 return true;
1361 /* Implement TARGET_FUNCTION_VALUE.
1362 Define how to find the value returned by a function. */
1364 static rtx
1365 aarch64_function_value (const_tree type, const_tree func,
1366 bool outgoing ATTRIBUTE_UNUSED)
1368 enum machine_mode mode;
1369 int unsignedp;
1370 int count;
1371 enum machine_mode ag_mode;
1373 mode = TYPE_MODE (type);
1374 if (INTEGRAL_TYPE_P (type))
1375 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1377 if (aarch64_return_in_msb (type))
1379 HOST_WIDE_INT size = int_size_in_bytes (type);
1381 if (size % UNITS_PER_WORD != 0)
1383 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1384 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1388 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1389 &ag_mode, &count, NULL))
1391 if (!aarch64_composite_type_p (type, mode))
1393 gcc_assert (count == 1 && mode == ag_mode);
1394 return gen_rtx_REG (mode, V0_REGNUM);
1396 else
1398 int i;
1399 rtx par;
1401 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1402 for (i = 0; i < count; i++)
1404 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1405 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1406 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1407 XVECEXP (par, 0, i) = tmp;
1409 return par;
1412 else
1413 return gen_rtx_REG (mode, R0_REGNUM);
1416 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1417 Return true if REGNO is the number of a hard register in which the values
1418 of called function may come back. */
1420 static bool
1421 aarch64_function_value_regno_p (const unsigned int regno)
1423 /* Maximum of 16 bytes can be returned in the general registers. Examples
1424 of 16-byte return values are: 128-bit integers and 16-byte small
1425 structures (excluding homogeneous floating-point aggregates). */
1426 if (regno == R0_REGNUM || regno == R1_REGNUM)
1427 return true;
1429 /* Up to four fp/simd registers can return a function value, e.g. a
1430 homogeneous floating-point aggregate having four members. */
1431 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1432 return !TARGET_GENERAL_REGS_ONLY;
1434 return false;
1437 /* Implement TARGET_RETURN_IN_MEMORY.
1439 If the type T of the result of a function is such that
1440 void func (T arg)
1441 would require that arg be passed as a value in a register (or set of
1442 registers) according to the parameter passing rules, then the result
1443 is returned in the same registers as would be used for such an
1444 argument. */
1446 static bool
1447 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1449 HOST_WIDE_INT size;
1450 enum machine_mode ag_mode;
1451 int count;
1453 if (!AGGREGATE_TYPE_P (type)
1454 && TREE_CODE (type) != COMPLEX_TYPE
1455 && TREE_CODE (type) != VECTOR_TYPE)
1456 /* Simple scalar types always returned in registers. */
1457 return false;
1459 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1460 type,
1461 &ag_mode,
1462 &count,
1463 NULL))
1464 return false;
1466 /* Types larger than 2 registers returned in memory. */
1467 size = int_size_in_bytes (type);
1468 return (size < 0 || size > 2 * UNITS_PER_WORD);
1471 static bool
1472 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1473 const_tree type, int *nregs)
1475 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1476 return aarch64_vfp_is_call_or_return_candidate (mode,
1477 type,
1478 &pcum->aapcs_vfp_rmode,
1479 nregs,
1480 NULL);
1483 /* Given MODE and TYPE of a function argument, return the alignment in
1484 bits. The idea is to suppress any stronger alignment requested by
1485 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1486 This is a helper function for local use only. */
1488 static unsigned int
1489 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1491 unsigned int alignment;
1493 if (type)
1495 if (!integer_zerop (TYPE_SIZE (type)))
1497 if (TYPE_MODE (type) == mode)
1498 alignment = TYPE_ALIGN (type);
1499 else
1500 alignment = GET_MODE_ALIGNMENT (mode);
1502 else
1503 alignment = 0;
1505 else
1506 alignment = GET_MODE_ALIGNMENT (mode);
1508 return alignment;
1511 /* Layout a function argument according to the AAPCS64 rules. The rule
1512 numbers refer to the rule numbers in the AAPCS64. */
1514 static void
1515 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1516 const_tree type,
1517 bool named ATTRIBUTE_UNUSED)
1519 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1520 int ncrn, nvrn, nregs;
1521 bool allocate_ncrn, allocate_nvrn;
1522 HOST_WIDE_INT size;
1524 /* We need to do this once per argument. */
1525 if (pcum->aapcs_arg_processed)
1526 return;
1528 pcum->aapcs_arg_processed = true;
1530 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1531 size
1532 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1533 UNITS_PER_WORD);
1535 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1536 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1537 mode,
1538 type,
1539 &nregs);
1541 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1542 The following code thus handles passing by SIMD/FP registers first. */
1544 nvrn = pcum->aapcs_nvrn;
1546 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1547 and homogenous short-vector aggregates (HVA). */
1548 if (allocate_nvrn)
1550 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1552 pcum->aapcs_nextnvrn = nvrn + nregs;
1553 if (!aarch64_composite_type_p (type, mode))
1555 gcc_assert (nregs == 1);
1556 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1558 else
1560 rtx par;
1561 int i;
1562 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1563 for (i = 0; i < nregs; i++)
1565 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1566 V0_REGNUM + nvrn + i);
1567 tmp = gen_rtx_EXPR_LIST
1568 (VOIDmode, tmp,
1569 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1570 XVECEXP (par, 0, i) = tmp;
1572 pcum->aapcs_reg = par;
1574 return;
1576 else
1578 /* C.3 NSRN is set to 8. */
1579 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1580 goto on_stack;
1584 ncrn = pcum->aapcs_ncrn;
1585 nregs = size / UNITS_PER_WORD;
1587 /* C6 - C9. though the sign and zero extension semantics are
1588 handled elsewhere. This is the case where the argument fits
1589 entirely general registers. */
1590 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1592 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1594 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1596 /* C.8 if the argument has an alignment of 16 then the NGRN is
1597 rounded up to the next even number. */
1598 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1600 ++ncrn;
1601 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1603 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1604 A reg is still generated for it, but the caller should be smart
1605 enough not to use it. */
1606 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1608 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1610 else
1612 rtx par;
1613 int i;
1615 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1616 for (i = 0; i < nregs; i++)
1618 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1619 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1620 GEN_INT (i * UNITS_PER_WORD));
1621 XVECEXP (par, 0, i) = tmp;
1623 pcum->aapcs_reg = par;
1626 pcum->aapcs_nextncrn = ncrn + nregs;
1627 return;
1630 /* C.11 */
1631 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1633 /* The argument is passed on stack; record the needed number of words for
1634 this argument and align the total size if necessary. */
1635 on_stack:
1636 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1637 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1638 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1639 16 / UNITS_PER_WORD);
1640 return;
1643 /* Implement TARGET_FUNCTION_ARG. */
1645 static rtx
1646 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1647 const_tree type, bool named)
1649 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1650 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1652 if (mode == VOIDmode)
1653 return NULL_RTX;
1655 aarch64_layout_arg (pcum_v, mode, type, named);
1656 return pcum->aapcs_reg;
1659 void
1660 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1661 const_tree fntype ATTRIBUTE_UNUSED,
1662 rtx libname ATTRIBUTE_UNUSED,
1663 const_tree fndecl ATTRIBUTE_UNUSED,
1664 unsigned n_named ATTRIBUTE_UNUSED)
1666 pcum->aapcs_ncrn = 0;
1667 pcum->aapcs_nvrn = 0;
1668 pcum->aapcs_nextncrn = 0;
1669 pcum->aapcs_nextnvrn = 0;
1670 pcum->pcs_variant = ARM_PCS_AAPCS64;
1671 pcum->aapcs_reg = NULL_RTX;
1672 pcum->aapcs_arg_processed = false;
1673 pcum->aapcs_stack_words = 0;
1674 pcum->aapcs_stack_size = 0;
1676 return;
1679 static void
1680 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1681 enum machine_mode mode,
1682 const_tree type,
1683 bool named)
1685 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1686 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1688 aarch64_layout_arg (pcum_v, mode, type, named);
1689 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1690 != (pcum->aapcs_stack_words != 0));
1691 pcum->aapcs_arg_processed = false;
1692 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1693 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1694 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1695 pcum->aapcs_stack_words = 0;
1696 pcum->aapcs_reg = NULL_RTX;
1700 bool
1701 aarch64_function_arg_regno_p (unsigned regno)
1703 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1704 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1707 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1708 PARM_BOUNDARY bits of alignment, but will be given anything up
1709 to STACK_BOUNDARY bits if the type requires it. This makes sure
1710 that both before and after the layout of each argument, the Next
1711 Stacked Argument Address (NSAA) will have a minimum alignment of
1712 8 bytes. */
1714 static unsigned int
1715 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1717 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1719 if (alignment < PARM_BOUNDARY)
1720 alignment = PARM_BOUNDARY;
1721 if (alignment > STACK_BOUNDARY)
1722 alignment = STACK_BOUNDARY;
1723 return alignment;
1726 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1728 Return true if an argument passed on the stack should be padded upwards,
1729 i.e. if the least-significant byte of the stack slot has useful data.
1731 Small aggregate types are placed in the lowest memory address.
1733 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1735 bool
1736 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1738 /* On little-endian targets, the least significant byte of every stack
1739 argument is passed at the lowest byte address of the stack slot. */
1740 if (!BYTES_BIG_ENDIAN)
1741 return true;
1743 /* Otherwise, integral, floating-point and pointer types are padded downward:
1744 the least significant byte of a stack argument is passed at the highest
1745 byte address of the stack slot. */
1746 if (type
1747 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1748 || POINTER_TYPE_P (type))
1749 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1750 return false;
1752 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1753 return true;
1756 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1758 It specifies padding for the last (may also be the only)
1759 element of a block move between registers and memory. If
1760 assuming the block is in the memory, padding upward means that
1761 the last element is padded after its highest significant byte,
1762 while in downward padding, the last element is padded at the
1763 its least significant byte side.
1765 Small aggregates and small complex types are always padded
1766 upwards.
1768 We don't need to worry about homogeneous floating-point or
1769 short-vector aggregates; their move is not affected by the
1770 padding direction determined here. Regardless of endianness,
1771 each element of such an aggregate is put in the least
1772 significant bits of a fp/simd register.
1774 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1775 register has useful data, and return the opposite if the most
1776 significant byte does. */
1778 bool
1779 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1780 bool first ATTRIBUTE_UNUSED)
1783 /* Small composite types are always padded upward. */
1784 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1786 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1787 : GET_MODE_SIZE (mode));
1788 if (size < 2 * UNITS_PER_WORD)
1789 return true;
1792 /* Otherwise, use the default padding. */
1793 return !BYTES_BIG_ENDIAN;
1796 static enum machine_mode
1797 aarch64_libgcc_cmp_return_mode (void)
1799 return SImode;
1802 static bool
1803 aarch64_frame_pointer_required (void)
1805 /* In aarch64_override_options_after_change
1806 flag_omit_leaf_frame_pointer turns off the frame pointer by
1807 default. Turn it back on now if we've not got a leaf
1808 function. */
1809 if (flag_omit_leaf_frame_pointer
1810 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1811 return true;
1813 return false;
1816 /* Mark the registers that need to be saved by the callee and calculate
1817 the size of the callee-saved registers area and frame record (both FP
1818 and LR may be omitted). */
1819 static void
1820 aarch64_layout_frame (void)
1822 HOST_WIDE_INT offset = 0;
1823 int regno;
1825 if (reload_completed && cfun->machine->frame.laid_out)
1826 return;
1828 #define SLOT_NOT_REQUIRED (-2)
1829 #define SLOT_REQUIRED (-1)
1831 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1832 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1834 /* First mark all the registers that really need to be saved... */
1835 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1838 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1839 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1841 /* ... that includes the eh data registers (if needed)... */
1842 if (crtl->calls_eh_return)
1843 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1844 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1845 = SLOT_REQUIRED;
1847 /* ... and any callee saved register that dataflow says is live. */
1848 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1849 if (df_regs_ever_live_p (regno)
1850 && !call_used_regs[regno])
1851 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1853 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1854 if (df_regs_ever_live_p (regno)
1855 && !call_used_regs[regno])
1856 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1858 if (frame_pointer_needed)
1860 /* FP and LR are placed in the linkage record. */
1861 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1862 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
1863 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1864 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1865 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1866 offset += 2 * UNITS_PER_WORD;
1869 /* Now assign stack slots for them. */
1870 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1871 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1873 cfun->machine->frame.reg_offset[regno] = offset;
1874 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1875 cfun->machine->frame.wb_candidate1 = regno;
1876 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1877 cfun->machine->frame.wb_candidate2 = regno;
1878 offset += UNITS_PER_WORD;
1881 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1882 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1884 cfun->machine->frame.reg_offset[regno] = offset;
1885 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1886 cfun->machine->frame.wb_candidate1 = regno;
1887 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1888 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1889 cfun->machine->frame.wb_candidate2 = regno;
1890 offset += UNITS_PER_WORD;
1893 cfun->machine->frame.padding0 =
1894 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1895 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1897 cfun->machine->frame.saved_regs_size = offset;
1899 cfun->machine->frame.hard_fp_offset
1900 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1901 + get_frame_size ()
1902 + cfun->machine->frame.saved_regs_size,
1903 STACK_BOUNDARY / BITS_PER_UNIT);
1905 cfun->machine->frame.frame_size
1906 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1907 + crtl->outgoing_args_size,
1908 STACK_BOUNDARY / BITS_PER_UNIT);
1910 cfun->machine->frame.laid_out = true;
1913 static bool
1914 aarch64_register_saved_on_entry (int regno)
1916 return cfun->machine->frame.reg_offset[regno] >= 0;
1919 static unsigned
1920 aarch64_next_callee_save (unsigned regno, unsigned limit)
1922 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1923 regno ++;
1924 return regno;
1927 static void
1928 aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1929 HOST_WIDE_INT adjustment)
1931 rtx base_rtx = stack_pointer_rtx;
1932 rtx insn, reg, mem;
1934 reg = gen_rtx_REG (mode, regno);
1935 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1936 plus_constant (Pmode, base_rtx, -adjustment));
1937 mem = gen_rtx_MEM (mode, mem);
1939 insn = emit_move_insn (mem, reg);
1940 RTX_FRAME_RELATED_P (insn) = 1;
1943 static rtx
1944 aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1945 HOST_WIDE_INT adjustment)
1947 switch (mode)
1949 case DImode:
1950 return gen_storewb_pairdi_di (base, base, reg, reg2,
1951 GEN_INT (-adjustment),
1952 GEN_INT (UNITS_PER_WORD - adjustment));
1953 case DFmode:
1954 return gen_storewb_pairdf_di (base, base, reg, reg2,
1955 GEN_INT (-adjustment),
1956 GEN_INT (UNITS_PER_WORD - adjustment));
1957 default:
1958 gcc_unreachable ();
1962 static void
1963 aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1964 unsigned regno2, HOST_WIDE_INT adjustment)
1966 rtx_insn *insn;
1967 rtx reg1 = gen_rtx_REG (mode, regno1);
1968 rtx reg2 = gen_rtx_REG (mode, regno2);
1970 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1971 reg2, adjustment));
1972 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1973 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1974 RTX_FRAME_RELATED_P (insn) = 1;
1977 static rtx
1978 aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1979 HOST_WIDE_INT adjustment)
1981 switch (mode)
1983 case DImode:
1984 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
1985 GEN_INT (UNITS_PER_WORD));
1986 case DFmode:
1987 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
1988 GEN_INT (UNITS_PER_WORD));
1989 default:
1990 gcc_unreachable ();
1994 static rtx
1995 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
1996 rtx reg2)
1998 switch (mode)
2000 case DImode:
2001 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2003 case DFmode:
2004 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2006 default:
2007 gcc_unreachable ();
2011 static rtx
2012 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2013 rtx mem2)
2015 switch (mode)
2017 case DImode:
2018 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2020 case DFmode:
2021 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2023 default:
2024 gcc_unreachable ();
2029 static void
2030 aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
2031 unsigned start, unsigned limit, bool skip_wb)
2033 rtx_insn *insn;
2034 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2035 ? gen_frame_mem : gen_rtx_MEM);
2036 unsigned regno;
2037 unsigned regno2;
2039 for (regno = aarch64_next_callee_save (start, limit);
2040 regno <= limit;
2041 regno = aarch64_next_callee_save (regno + 1, limit))
2043 rtx reg, mem;
2044 HOST_WIDE_INT offset;
2046 if (skip_wb
2047 && (regno == cfun->machine->frame.wb_candidate1
2048 || regno == cfun->machine->frame.wb_candidate2))
2049 continue;
2051 reg = gen_rtx_REG (mode, regno);
2052 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2053 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2054 offset));
2056 regno2 = aarch64_next_callee_save (regno + 1, limit);
2058 if (regno2 <= limit
2059 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2060 == cfun->machine->frame.reg_offset[regno2]))
2063 rtx reg2 = gen_rtx_REG (mode, regno2);
2064 rtx mem2;
2066 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2067 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2068 offset));
2069 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2070 reg2));
2072 /* The first part of a frame-related parallel insn is
2073 always assumed to be relevant to the frame
2074 calculations; subsequent parts, are only
2075 frame-related if explicitly marked. */
2076 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2077 regno = regno2;
2079 else
2080 insn = emit_move_insn (mem, reg);
2082 RTX_FRAME_RELATED_P (insn) = 1;
2086 static void
2087 aarch64_restore_callee_saves (enum machine_mode mode,
2088 HOST_WIDE_INT start_offset, unsigned start,
2089 unsigned limit, bool skip_wb, rtx *cfi_ops)
2091 rtx base_rtx = stack_pointer_rtx;
2092 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2093 ? gen_frame_mem : gen_rtx_MEM);
2094 unsigned regno;
2095 unsigned regno2;
2096 HOST_WIDE_INT offset;
2098 for (regno = aarch64_next_callee_save (start, limit);
2099 regno <= limit;
2100 regno = aarch64_next_callee_save (regno + 1, limit))
2102 rtx reg, mem;
2104 if (skip_wb
2105 && (regno == cfun->machine->frame.wb_candidate1
2106 || regno == cfun->machine->frame.wb_candidate2))
2107 continue;
2109 reg = gen_rtx_REG (mode, regno);
2110 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2111 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2113 regno2 = aarch64_next_callee_save (regno + 1, limit);
2115 if (regno2 <= limit
2116 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2117 == cfun->machine->frame.reg_offset[regno2]))
2119 rtx reg2 = gen_rtx_REG (mode, regno2);
2120 rtx mem2;
2122 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2123 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2124 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2126 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2127 regno = regno2;
2129 else
2130 emit_move_insn (reg, mem);
2131 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2135 /* AArch64 stack frames generated by this compiler look like:
2137 +-------------------------------+
2139 | incoming stack arguments |
2141 +-------------------------------+
2142 | | <-- incoming stack pointer (aligned)
2143 | callee-allocated save area |
2144 | for register varargs |
2146 +-------------------------------+
2147 | local variables | <-- frame_pointer_rtx
2149 +-------------------------------+
2150 | padding0 | \
2151 +-------------------------------+ |
2152 | callee-saved registers | | frame.saved_regs_size
2153 +-------------------------------+ |
2154 | LR' | |
2155 +-------------------------------+ |
2156 | FP' | / <- hard_frame_pointer_rtx (aligned)
2157 +-------------------------------+
2158 | dynamic allocation |
2159 +-------------------------------+
2160 | padding |
2161 +-------------------------------+
2162 | outgoing stack arguments | <-- arg_pointer
2164 +-------------------------------+
2165 | | <-- stack_pointer_rtx (aligned)
2167 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2168 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2169 unchanged. */
2171 /* Generate the prologue instructions for entry into a function.
2172 Establish the stack frame by decreasing the stack pointer with a
2173 properly calculated size and, if necessary, create a frame record
2174 filled with the values of LR and previous frame pointer. The
2175 current FP is also set up if it is in use. */
2177 void
2178 aarch64_expand_prologue (void)
2180 /* sub sp, sp, #<frame_size>
2181 stp {fp, lr}, [sp, #<frame_size> - 16]
2182 add fp, sp, #<frame_size> - hardfp_offset
2183 stp {cs_reg}, [fp, #-16] etc.
2185 sub sp, sp, <final_adjustment_if_any>
2187 HOST_WIDE_INT frame_size, offset;
2188 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2189 HOST_WIDE_INT hard_fp_offset;
2190 rtx_insn *insn;
2192 aarch64_layout_frame ();
2194 offset = frame_size = cfun->machine->frame.frame_size;
2195 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2196 fp_offset = frame_size - hard_fp_offset;
2198 if (flag_stack_usage_info)
2199 current_function_static_stack_size = frame_size;
2201 /* Store pairs and load pairs have a range only -512 to 504. */
2202 if (offset >= 512)
2204 /* When the frame has a large size, an initial decrease is done on
2205 the stack pointer to jump over the callee-allocated save area for
2206 register varargs, the local variable area and/or the callee-saved
2207 register area. This will allow the pre-index write-back
2208 store pair instructions to be used for setting up the stack frame
2209 efficiently. */
2210 offset = hard_fp_offset;
2211 if (offset >= 512)
2212 offset = cfun->machine->frame.saved_regs_size;
2214 frame_size -= (offset + crtl->outgoing_args_size);
2215 fp_offset = 0;
2217 if (frame_size >= 0x1000000)
2219 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2220 emit_move_insn (op0, GEN_INT (-frame_size));
2221 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2223 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2224 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2225 plus_constant (Pmode, stack_pointer_rtx,
2226 -frame_size)));
2227 RTX_FRAME_RELATED_P (insn) = 1;
2229 else if (frame_size > 0)
2231 int hi_ofs = frame_size & 0xfff000;
2232 int lo_ofs = frame_size & 0x000fff;
2234 if (hi_ofs)
2236 insn = emit_insn (gen_add2_insn
2237 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2238 RTX_FRAME_RELATED_P (insn) = 1;
2240 if (lo_ofs)
2242 insn = emit_insn (gen_add2_insn
2243 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2244 RTX_FRAME_RELATED_P (insn) = 1;
2248 else
2249 frame_size = -1;
2251 if (offset > 0)
2253 bool skip_wb = false;
2255 if (frame_pointer_needed)
2257 skip_wb = true;
2259 if (fp_offset)
2261 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2262 GEN_INT (-offset)));
2263 RTX_FRAME_RELATED_P (insn) = 1;
2265 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2266 R30_REGNUM, false);
2268 else
2269 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2271 /* Set up frame pointer to point to the location of the
2272 previous frame pointer on the stack. */
2273 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2274 stack_pointer_rtx,
2275 GEN_INT (fp_offset)));
2276 RTX_FRAME_RELATED_P (insn) = 1;
2277 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2279 else
2281 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2282 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2284 if (fp_offset
2285 || reg1 == FIRST_PSEUDO_REGISTER
2286 || (reg2 == FIRST_PSEUDO_REGISTER
2287 && offset >= 256))
2289 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2290 GEN_INT (-offset)));
2291 RTX_FRAME_RELATED_P (insn) = 1;
2293 else
2295 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2297 skip_wb = true;
2299 if (reg2 == FIRST_PSEUDO_REGISTER)
2300 aarch64_pushwb_single_reg (mode1, reg1, offset);
2301 else
2302 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2306 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2307 skip_wb);
2308 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2309 skip_wb);
2312 /* when offset >= 512,
2313 sub sp, sp, #<outgoing_args_size> */
2314 if (frame_size > -1)
2316 if (crtl->outgoing_args_size > 0)
2318 insn = emit_insn (gen_add2_insn
2319 (stack_pointer_rtx,
2320 GEN_INT (- crtl->outgoing_args_size)));
2321 RTX_FRAME_RELATED_P (insn) = 1;
2326 /* Generate the epilogue instructions for returning from a function. */
2327 void
2328 aarch64_expand_epilogue (bool for_sibcall)
2330 HOST_WIDE_INT frame_size, offset;
2331 HOST_WIDE_INT fp_offset;
2332 HOST_WIDE_INT hard_fp_offset;
2333 rtx_insn *insn;
2335 aarch64_layout_frame ();
2337 offset = frame_size = cfun->machine->frame.frame_size;
2338 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2339 fp_offset = frame_size - hard_fp_offset;
2341 /* Store pairs and load pairs have a range only -512 to 504. */
2342 if (offset >= 512)
2344 offset = hard_fp_offset;
2345 if (offset >= 512)
2346 offset = cfun->machine->frame.saved_regs_size;
2348 frame_size -= (offset + crtl->outgoing_args_size);
2349 fp_offset = 0;
2350 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2352 insn = emit_insn (gen_add2_insn
2353 (stack_pointer_rtx,
2354 GEN_INT (crtl->outgoing_args_size)));
2355 RTX_FRAME_RELATED_P (insn) = 1;
2358 else
2359 frame_size = -1;
2361 /* If there were outgoing arguments or we've done dynamic stack
2362 allocation, then restore the stack pointer from the frame
2363 pointer. This is at most one insn and more efficient than using
2364 GCC's internal mechanism. */
2365 if (frame_pointer_needed
2366 && (crtl->outgoing_args_size || cfun->calls_alloca))
2368 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2369 hard_frame_pointer_rtx,
2370 GEN_INT (0)));
2371 offset = offset - fp_offset;
2374 if (offset > 0)
2376 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2377 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2378 bool skip_wb = true;
2379 rtx cfi_ops = NULL;
2381 if (frame_pointer_needed)
2382 fp_offset = 0;
2383 else if (fp_offset
2384 || reg1 == FIRST_PSEUDO_REGISTER
2385 || (reg2 == FIRST_PSEUDO_REGISTER
2386 && offset >= 256))
2387 skip_wb = false;
2389 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2390 skip_wb, &cfi_ops);
2391 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2392 skip_wb, &cfi_ops);
2394 if (skip_wb)
2396 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2397 rtx rreg1 = gen_rtx_REG (mode1, reg1);
2399 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2400 if (reg2 == FIRST_PSEUDO_REGISTER)
2402 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2403 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2404 mem = gen_rtx_MEM (mode1, mem);
2405 insn = emit_move_insn (rreg1, mem);
2407 else
2409 rtx rreg2 = gen_rtx_REG (mode1, reg2);
2411 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2412 insn = emit_insn (aarch64_gen_loadwb_pair
2413 (mode1, stack_pointer_rtx, rreg1,
2414 rreg2, offset));
2417 else
2419 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420 GEN_INT (offset)));
2423 /* Reset the CFA to be SP + FRAME_SIZE. */
2424 rtx new_cfa = stack_pointer_rtx;
2425 if (frame_size > 0)
2426 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2427 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2428 REG_NOTES (insn) = cfi_ops;
2429 RTX_FRAME_RELATED_P (insn) = 1;
2432 if (frame_size > 0)
2434 if (frame_size >= 0x1000000)
2436 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2437 emit_move_insn (op0, GEN_INT (frame_size));
2438 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2440 else
2442 int hi_ofs = frame_size & 0xfff000;
2443 int lo_ofs = frame_size & 0x000fff;
2445 if (hi_ofs && lo_ofs)
2447 insn = emit_insn (gen_add2_insn
2448 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2449 RTX_FRAME_RELATED_P (insn) = 1;
2450 frame_size = lo_ofs;
2452 insn = emit_insn (gen_add2_insn
2453 (stack_pointer_rtx, GEN_INT (frame_size)));
2456 /* Reset the CFA to be SP + 0. */
2457 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2458 RTX_FRAME_RELATED_P (insn) = 1;
2461 /* Stack adjustment for exception handler. */
2462 if (crtl->calls_eh_return)
2464 /* We need to unwind the stack by the offset computed by
2465 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2466 to be SP; letting the CFA move during this adjustment
2467 is just as correct as retaining the CFA from the body
2468 of the function. Therefore, do nothing special. */
2469 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2472 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2473 if (!for_sibcall)
2474 emit_jump_insn (ret_rtx);
2477 /* Return the place to copy the exception unwinding return address to.
2478 This will probably be a stack slot, but could (in theory be the
2479 return register). */
2481 aarch64_final_eh_return_addr (void)
2483 HOST_WIDE_INT fp_offset;
2485 aarch64_layout_frame ();
2487 fp_offset = cfun->machine->frame.frame_size
2488 - cfun->machine->frame.hard_fp_offset;
2490 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2491 return gen_rtx_REG (DImode, LR_REGNUM);
2493 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2494 result in a store to save LR introduced by builtin_eh_return () being
2495 incorrectly deleted because the alias is not detected.
2496 So in the calculation of the address to copy the exception unwinding
2497 return address to, we note 2 cases.
2498 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2499 we return a SP-relative location since all the addresses are SP-relative
2500 in this case. This prevents the store from being optimized away.
2501 If the fp_offset is not 0, then the addresses will be FP-relative and
2502 therefore we return a FP-relative location. */
2504 if (frame_pointer_needed)
2506 if (fp_offset)
2507 return gen_frame_mem (DImode,
2508 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2509 else
2510 return gen_frame_mem (DImode,
2511 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2514 /* If FP is not needed, we calculate the location of LR, which would be
2515 at the top of the saved registers block. */
2517 return gen_frame_mem (DImode,
2518 plus_constant (Pmode,
2519 stack_pointer_rtx,
2520 fp_offset
2521 + cfun->machine->frame.saved_regs_size
2522 - 2 * UNITS_PER_WORD));
2525 /* Possibly output code to build up a constant in a register. For
2526 the benefit of the costs infrastructure, returns the number of
2527 instructions which would be emitted. GENERATE inhibits or
2528 enables code generation. */
2530 static int
2531 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2533 int insns = 0;
2535 if (aarch64_bitmask_imm (val, DImode))
2537 if (generate)
2538 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2539 insns = 1;
2541 else
2543 int i;
2544 int ncount = 0;
2545 int zcount = 0;
2546 HOST_WIDE_INT valp = val >> 16;
2547 HOST_WIDE_INT valm;
2548 HOST_WIDE_INT tval;
2550 for (i = 16; i < 64; i += 16)
2552 valm = (valp & 0xffff);
2554 if (valm != 0)
2555 ++ zcount;
2557 if (valm != 0xffff)
2558 ++ ncount;
2560 valp >>= 16;
2563 /* zcount contains the number of additional MOVK instructions
2564 required if the constant is built up with an initial MOVZ instruction,
2565 while ncount is the number of MOVK instructions required if starting
2566 with a MOVN instruction. Choose the sequence that yields the fewest
2567 number of instructions, preferring MOVZ instructions when they are both
2568 the same. */
2569 if (ncount < zcount)
2571 if (generate)
2572 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2573 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2574 tval = 0xffff;
2575 insns++;
2577 else
2579 if (generate)
2580 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2581 GEN_INT (val & 0xffff));
2582 tval = 0;
2583 insns++;
2586 val >>= 16;
2588 for (i = 16; i < 64; i += 16)
2590 if ((val & 0xffff) != tval)
2592 if (generate)
2593 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2594 GEN_INT (i),
2595 GEN_INT (val & 0xffff)));
2596 insns++;
2598 val >>= 16;
2601 return insns;
2604 static void
2605 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2607 HOST_WIDE_INT mdelta = delta;
2608 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2609 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2611 if (mdelta < 0)
2612 mdelta = -mdelta;
2614 if (mdelta >= 4096 * 4096)
2616 (void) aarch64_build_constant (scratchreg, delta, true);
2617 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2619 else if (mdelta > 0)
2621 if (mdelta >= 4096)
2623 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2624 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2625 if (delta < 0)
2626 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2627 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2628 else
2629 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2630 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2632 if (mdelta % 4096 != 0)
2634 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2635 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2636 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2641 /* Output code to add DELTA to the first argument, and then jump
2642 to FUNCTION. Used for C++ multiple inheritance. */
2643 static void
2644 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2645 HOST_WIDE_INT delta,
2646 HOST_WIDE_INT vcall_offset,
2647 tree function)
2649 /* The this pointer is always in x0. Note that this differs from
2650 Arm where the this pointer maybe bumped to r1 if r0 is required
2651 to return a pointer to an aggregate. On AArch64 a result value
2652 pointer will be in x8. */
2653 int this_regno = R0_REGNUM;
2654 rtx this_rtx, temp0, temp1, addr, funexp;
2655 rtx_insn *insn;
2657 reload_completed = 1;
2658 emit_note (NOTE_INSN_PROLOGUE_END);
2660 if (vcall_offset == 0)
2661 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2662 else
2664 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2666 this_rtx = gen_rtx_REG (Pmode, this_regno);
2667 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2668 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2670 addr = this_rtx;
2671 if (delta != 0)
2673 if (delta >= -256 && delta < 256)
2674 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2675 plus_constant (Pmode, this_rtx, delta));
2676 else
2677 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2680 if (Pmode == ptr_mode)
2681 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2682 else
2683 aarch64_emit_move (temp0,
2684 gen_rtx_ZERO_EXTEND (Pmode,
2685 gen_rtx_MEM (ptr_mode, addr)));
2687 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2688 addr = plus_constant (Pmode, temp0, vcall_offset);
2689 else
2691 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2692 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2695 if (Pmode == ptr_mode)
2696 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2697 else
2698 aarch64_emit_move (temp1,
2699 gen_rtx_SIGN_EXTEND (Pmode,
2700 gen_rtx_MEM (ptr_mode, addr)));
2702 emit_insn (gen_add2_insn (this_rtx, temp1));
2705 /* Generate a tail call to the target function. */
2706 if (!TREE_USED (function))
2708 assemble_external (function);
2709 TREE_USED (function) = 1;
2711 funexp = XEXP (DECL_RTL (function), 0);
2712 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2713 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2714 SIBLING_CALL_P (insn) = 1;
2716 insn = get_insns ();
2717 shorten_branches (insn);
2718 final_start_function (insn, file, 1);
2719 final (insn, file, 1);
2720 final_end_function ();
2722 /* Stop pretending to be a post-reload pass. */
2723 reload_completed = 0;
2726 static int
2727 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2729 if (GET_CODE (*x) == SYMBOL_REF)
2730 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2732 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2733 TLS offsets, not real symbol references. */
2734 if (GET_CODE (*x) == UNSPEC
2735 && XINT (*x, 1) == UNSPEC_TLS)
2736 return -1;
2738 return 0;
2741 static bool
2742 aarch64_tls_referenced_p (rtx x)
2744 if (!TARGET_HAVE_TLS)
2745 return false;
2747 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2751 static int
2752 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2754 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2755 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2757 if (*imm1 < *imm2)
2758 return -1;
2759 if (*imm1 > *imm2)
2760 return +1;
2761 return 0;
2765 static void
2766 aarch64_build_bitmask_table (void)
2768 unsigned HOST_WIDE_INT mask, imm;
2769 unsigned int log_e, e, s, r;
2770 unsigned int nimms = 0;
2772 for (log_e = 1; log_e <= 6; log_e++)
2774 e = 1 << log_e;
2775 if (e == 64)
2776 mask = ~(HOST_WIDE_INT) 0;
2777 else
2778 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2779 for (s = 1; s < e; s++)
2781 for (r = 0; r < e; r++)
2783 /* set s consecutive bits to 1 (s < 64) */
2784 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2785 /* rotate right by r */
2786 if (r != 0)
2787 imm = ((imm >> r) | (imm << (e - r))) & mask;
2788 /* replicate the constant depending on SIMD size */
2789 switch (log_e) {
2790 case 1: imm |= (imm << 2);
2791 case 2: imm |= (imm << 4);
2792 case 3: imm |= (imm << 8);
2793 case 4: imm |= (imm << 16);
2794 case 5: imm |= (imm << 32);
2795 case 6:
2796 break;
2797 default:
2798 gcc_unreachable ();
2800 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2801 aarch64_bitmasks[nimms++] = imm;
2806 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2807 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2808 aarch64_bitmasks_cmp);
2812 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2813 a left shift of 0 or 12 bits. */
2814 bool
2815 aarch64_uimm12_shift (HOST_WIDE_INT val)
2817 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2818 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2823 /* Return true if val is an immediate that can be loaded into a
2824 register by a MOVZ instruction. */
2825 static bool
2826 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2828 if (GET_MODE_SIZE (mode) > 4)
2830 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2831 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2832 return 1;
2834 else
2836 /* Ignore sign extension. */
2837 val &= (HOST_WIDE_INT) 0xffffffff;
2839 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2840 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2844 /* Return true if val is a valid bitmask immediate. */
2845 bool
2846 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2848 if (GET_MODE_SIZE (mode) < 8)
2850 /* Replicate bit pattern. */
2851 val &= (HOST_WIDE_INT) 0xffffffff;
2852 val |= val << 32;
2854 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2855 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2859 /* Return true if val is an immediate that can be loaded into a
2860 register in a single instruction. */
2861 bool
2862 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2864 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2865 return 1;
2866 return aarch64_bitmask_imm (val, mode);
2869 static bool
2870 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2872 rtx base, offset;
2874 if (GET_CODE (x) == HIGH)
2875 return true;
2877 split_const (x, &base, &offset);
2878 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2880 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2881 != SYMBOL_FORCE_TO_MEM)
2882 return true;
2883 else
2884 /* Avoid generating a 64-bit relocation in ILP32; leave
2885 to aarch64_expand_mov_immediate to handle it properly. */
2886 return mode != ptr_mode;
2889 return aarch64_tls_referenced_p (x);
2892 /* Return true if register REGNO is a valid index register.
2893 STRICT_P is true if REG_OK_STRICT is in effect. */
2895 bool
2896 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2898 if (!HARD_REGISTER_NUM_P (regno))
2900 if (!strict_p)
2901 return true;
2903 if (!reg_renumber)
2904 return false;
2906 regno = reg_renumber[regno];
2908 return GP_REGNUM_P (regno);
2911 /* Return true if register REGNO is a valid base register for mode MODE.
2912 STRICT_P is true if REG_OK_STRICT is in effect. */
2914 bool
2915 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2917 if (!HARD_REGISTER_NUM_P (regno))
2919 if (!strict_p)
2920 return true;
2922 if (!reg_renumber)
2923 return false;
2925 regno = reg_renumber[regno];
2928 /* The fake registers will be eliminated to either the stack or
2929 hard frame pointer, both of which are usually valid base registers.
2930 Reload deals with the cases where the eliminated form isn't valid. */
2931 return (GP_REGNUM_P (regno)
2932 || regno == SP_REGNUM
2933 || regno == FRAME_POINTER_REGNUM
2934 || regno == ARG_POINTER_REGNUM);
2937 /* Return true if X is a valid base register for mode MODE.
2938 STRICT_P is true if REG_OK_STRICT is in effect. */
2940 static bool
2941 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2943 if (!strict_p && GET_CODE (x) == SUBREG)
2944 x = SUBREG_REG (x);
2946 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2949 /* Return true if address offset is a valid index. If it is, fill in INFO
2950 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2952 static bool
2953 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2954 enum machine_mode mode, bool strict_p)
2956 enum aarch64_address_type type;
2957 rtx index;
2958 int shift;
2960 /* (reg:P) */
2961 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2962 && GET_MODE (x) == Pmode)
2964 type = ADDRESS_REG_REG;
2965 index = x;
2966 shift = 0;
2968 /* (sign_extend:DI (reg:SI)) */
2969 else if ((GET_CODE (x) == SIGN_EXTEND
2970 || GET_CODE (x) == ZERO_EXTEND)
2971 && GET_MODE (x) == DImode
2972 && GET_MODE (XEXP (x, 0)) == SImode)
2974 type = (GET_CODE (x) == SIGN_EXTEND)
2975 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2976 index = XEXP (x, 0);
2977 shift = 0;
2979 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2980 else if (GET_CODE (x) == MULT
2981 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2982 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2983 && GET_MODE (XEXP (x, 0)) == DImode
2984 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2985 && CONST_INT_P (XEXP (x, 1)))
2987 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2988 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2989 index = XEXP (XEXP (x, 0), 0);
2990 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2992 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2993 else if (GET_CODE (x) == ASHIFT
2994 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2995 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2996 && GET_MODE (XEXP (x, 0)) == DImode
2997 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2998 && CONST_INT_P (XEXP (x, 1)))
3000 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3001 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3002 index = XEXP (XEXP (x, 0), 0);
3003 shift = INTVAL (XEXP (x, 1));
3005 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3006 else if ((GET_CODE (x) == SIGN_EXTRACT
3007 || GET_CODE (x) == ZERO_EXTRACT)
3008 && GET_MODE (x) == DImode
3009 && GET_CODE (XEXP (x, 0)) == MULT
3010 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3011 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3013 type = (GET_CODE (x) == SIGN_EXTRACT)
3014 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3015 index = XEXP (XEXP (x, 0), 0);
3016 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3017 if (INTVAL (XEXP (x, 1)) != 32 + shift
3018 || INTVAL (XEXP (x, 2)) != 0)
3019 shift = -1;
3021 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3022 (const_int 0xffffffff<<shift)) */
3023 else if (GET_CODE (x) == AND
3024 && GET_MODE (x) == DImode
3025 && GET_CODE (XEXP (x, 0)) == MULT
3026 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3027 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3028 && CONST_INT_P (XEXP (x, 1)))
3030 type = ADDRESS_REG_UXTW;
3031 index = XEXP (XEXP (x, 0), 0);
3032 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3033 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3034 shift = -1;
3036 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3037 else if ((GET_CODE (x) == SIGN_EXTRACT
3038 || GET_CODE (x) == ZERO_EXTRACT)
3039 && GET_MODE (x) == DImode
3040 && GET_CODE (XEXP (x, 0)) == ASHIFT
3041 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3042 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3044 type = (GET_CODE (x) == SIGN_EXTRACT)
3045 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3046 index = XEXP (XEXP (x, 0), 0);
3047 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3048 if (INTVAL (XEXP (x, 1)) != 32 + shift
3049 || INTVAL (XEXP (x, 2)) != 0)
3050 shift = -1;
3052 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3053 (const_int 0xffffffff<<shift)) */
3054 else if (GET_CODE (x) == AND
3055 && GET_MODE (x) == DImode
3056 && GET_CODE (XEXP (x, 0)) == ASHIFT
3057 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3058 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3059 && CONST_INT_P (XEXP (x, 1)))
3061 type = ADDRESS_REG_UXTW;
3062 index = XEXP (XEXP (x, 0), 0);
3063 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3064 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3065 shift = -1;
3067 /* (mult:P (reg:P) (const_int scale)) */
3068 else if (GET_CODE (x) == MULT
3069 && GET_MODE (x) == Pmode
3070 && GET_MODE (XEXP (x, 0)) == Pmode
3071 && CONST_INT_P (XEXP (x, 1)))
3073 type = ADDRESS_REG_REG;
3074 index = XEXP (x, 0);
3075 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3077 /* (ashift:P (reg:P) (const_int shift)) */
3078 else if (GET_CODE (x) == ASHIFT
3079 && GET_MODE (x) == Pmode
3080 && GET_MODE (XEXP (x, 0)) == Pmode
3081 && CONST_INT_P (XEXP (x, 1)))
3083 type = ADDRESS_REG_REG;
3084 index = XEXP (x, 0);
3085 shift = INTVAL (XEXP (x, 1));
3087 else
3088 return false;
3090 if (GET_CODE (index) == SUBREG)
3091 index = SUBREG_REG (index);
3093 if ((shift == 0 ||
3094 (shift > 0 && shift <= 3
3095 && (1 << shift) == GET_MODE_SIZE (mode)))
3096 && REG_P (index)
3097 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3099 info->type = type;
3100 info->offset = index;
3101 info->shift = shift;
3102 return true;
3105 return false;
3108 bool
3109 aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3111 return (offset >= -64 * GET_MODE_SIZE (mode)
3112 && offset < 64 * GET_MODE_SIZE (mode)
3113 && offset % GET_MODE_SIZE (mode) == 0);
3116 static inline bool
3117 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3118 HOST_WIDE_INT offset)
3120 return offset >= -256 && offset < 256;
3123 static inline bool
3124 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3126 return (offset >= 0
3127 && offset < 4096 * GET_MODE_SIZE (mode)
3128 && offset % GET_MODE_SIZE (mode) == 0);
3131 /* Return true if X is a valid address for machine mode MODE. If it is,
3132 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3133 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3135 static bool
3136 aarch64_classify_address (struct aarch64_address_info *info,
3137 rtx x, enum machine_mode mode,
3138 RTX_CODE outer_code, bool strict_p)
3140 enum rtx_code code = GET_CODE (x);
3141 rtx op0, op1;
3142 bool allow_reg_index_p =
3143 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3144 || aarch64_vector_mode_supported_p (mode));
3145 /* Don't support anything other than POST_INC or REG addressing for
3146 AdvSIMD. */
3147 if (aarch64_vect_struct_mode_p (mode)
3148 && (code != POST_INC && code != REG))
3149 return false;
3151 switch (code)
3153 case REG:
3154 case SUBREG:
3155 info->type = ADDRESS_REG_IMM;
3156 info->base = x;
3157 info->offset = const0_rtx;
3158 return aarch64_base_register_rtx_p (x, strict_p);
3160 case PLUS:
3161 op0 = XEXP (x, 0);
3162 op1 = XEXP (x, 1);
3164 if (! strict_p
3165 && REG_P (op0)
3166 && (op0 == virtual_stack_vars_rtx
3167 || op0 == frame_pointer_rtx
3168 || op0 == arg_pointer_rtx)
3169 && CONST_INT_P (op1))
3171 info->type = ADDRESS_REG_IMM;
3172 info->base = op0;
3173 info->offset = op1;
3175 return true;
3178 if (GET_MODE_SIZE (mode) != 0
3179 && CONST_INT_P (op1)
3180 && aarch64_base_register_rtx_p (op0, strict_p))
3182 HOST_WIDE_INT offset = INTVAL (op1);
3184 info->type = ADDRESS_REG_IMM;
3185 info->base = op0;
3186 info->offset = op1;
3188 /* TImode and TFmode values are allowed in both pairs of X
3189 registers and individual Q registers. The available
3190 address modes are:
3191 X,X: 7-bit signed scaled offset
3192 Q: 9-bit signed offset
3193 We conservatively require an offset representable in either mode.
3195 if (mode == TImode || mode == TFmode)
3196 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3197 && offset_9bit_signed_unscaled_p (mode, offset));
3199 if (outer_code == PARALLEL)
3200 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3201 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3202 else
3203 return (offset_9bit_signed_unscaled_p (mode, offset)
3204 || offset_12bit_unsigned_scaled_p (mode, offset));
3207 if (allow_reg_index_p)
3209 /* Look for base + (scaled/extended) index register. */
3210 if (aarch64_base_register_rtx_p (op0, strict_p)
3211 && aarch64_classify_index (info, op1, mode, strict_p))
3213 info->base = op0;
3214 return true;
3216 if (aarch64_base_register_rtx_p (op1, strict_p)
3217 && aarch64_classify_index (info, op0, mode, strict_p))
3219 info->base = op1;
3220 return true;
3224 return false;
3226 case POST_INC:
3227 case POST_DEC:
3228 case PRE_INC:
3229 case PRE_DEC:
3230 info->type = ADDRESS_REG_WB;
3231 info->base = XEXP (x, 0);
3232 info->offset = NULL_RTX;
3233 return aarch64_base_register_rtx_p (info->base, strict_p);
3235 case POST_MODIFY:
3236 case PRE_MODIFY:
3237 info->type = ADDRESS_REG_WB;
3238 info->base = XEXP (x, 0);
3239 if (GET_CODE (XEXP (x, 1)) == PLUS
3240 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3241 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3242 && aarch64_base_register_rtx_p (info->base, strict_p))
3244 HOST_WIDE_INT offset;
3245 info->offset = XEXP (XEXP (x, 1), 1);
3246 offset = INTVAL (info->offset);
3248 /* TImode and TFmode values are allowed in both pairs of X
3249 registers and individual Q registers. The available
3250 address modes are:
3251 X,X: 7-bit signed scaled offset
3252 Q: 9-bit signed offset
3253 We conservatively require an offset representable in either mode.
3255 if (mode == TImode || mode == TFmode)
3256 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3257 && offset_9bit_signed_unscaled_p (mode, offset));
3259 if (outer_code == PARALLEL)
3260 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3261 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3262 else
3263 return offset_9bit_signed_unscaled_p (mode, offset);
3265 return false;
3267 case CONST:
3268 case SYMBOL_REF:
3269 case LABEL_REF:
3270 /* load literal: pc-relative constant pool entry. Only supported
3271 for SI mode or larger. */
3272 info->type = ADDRESS_SYMBOLIC;
3273 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3275 rtx sym, addend;
3277 split_const (x, &sym, &addend);
3278 return (GET_CODE (sym) == LABEL_REF
3279 || (GET_CODE (sym) == SYMBOL_REF
3280 && CONSTANT_POOL_ADDRESS_P (sym)));
3282 return false;
3284 case LO_SUM:
3285 info->type = ADDRESS_LO_SUM;
3286 info->base = XEXP (x, 0);
3287 info->offset = XEXP (x, 1);
3288 if (allow_reg_index_p
3289 && aarch64_base_register_rtx_p (info->base, strict_p))
3291 rtx sym, offs;
3292 split_const (info->offset, &sym, &offs);
3293 if (GET_CODE (sym) == SYMBOL_REF
3294 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3295 == SYMBOL_SMALL_ABSOLUTE))
3297 /* The symbol and offset must be aligned to the access size. */
3298 unsigned int align;
3299 unsigned int ref_size;
3301 if (CONSTANT_POOL_ADDRESS_P (sym))
3302 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3303 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3305 tree exp = SYMBOL_REF_DECL (sym);
3306 align = TYPE_ALIGN (TREE_TYPE (exp));
3307 align = CONSTANT_ALIGNMENT (exp, align);
3309 else if (SYMBOL_REF_DECL (sym))
3310 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3311 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3312 && SYMBOL_REF_BLOCK (sym) != NULL)
3313 align = SYMBOL_REF_BLOCK (sym)->alignment;
3314 else
3315 align = BITS_PER_UNIT;
3317 ref_size = GET_MODE_SIZE (mode);
3318 if (ref_size == 0)
3319 ref_size = GET_MODE_SIZE (DImode);
3321 return ((INTVAL (offs) & (ref_size - 1)) == 0
3322 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3325 return false;
3327 default:
3328 return false;
3332 bool
3333 aarch64_symbolic_address_p (rtx x)
3335 rtx offset;
3337 split_const (x, &x, &offset);
3338 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3341 /* Classify the base of symbolic expression X, given that X appears in
3342 context CONTEXT. */
3344 enum aarch64_symbol_type
3345 aarch64_classify_symbolic_expression (rtx x,
3346 enum aarch64_symbol_context context)
3348 rtx offset;
3350 split_const (x, &x, &offset);
3351 return aarch64_classify_symbol (x, context);
3355 /* Return TRUE if X is a legitimate address for accessing memory in
3356 mode MODE. */
3357 static bool
3358 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3360 struct aarch64_address_info addr;
3362 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3366 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3367 pair operation. */
3368 bool
3369 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3370 RTX_CODE outer_code, bool strict_p)
3372 struct aarch64_address_info addr;
3374 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3377 /* Return TRUE if rtx X is immediate constant 0.0 */
3378 bool
3379 aarch64_float_const_zero_rtx_p (rtx x)
3381 REAL_VALUE_TYPE r;
3383 if (GET_MODE (x) == VOIDmode)
3384 return false;
3386 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3387 if (REAL_VALUE_MINUS_ZERO (r))
3388 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3389 return REAL_VALUES_EQUAL (r, dconst0);
3392 /* Return the fixed registers used for condition codes. */
3394 static bool
3395 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3397 *p1 = CC_REGNUM;
3398 *p2 = INVALID_REGNUM;
3399 return true;
3402 /* Emit call insn with PAT and do aarch64-specific handling. */
3404 void
3405 aarch64_emit_call_insn (rtx pat)
3407 rtx insn = emit_call_insn (pat);
3409 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3410 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3411 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3414 enum machine_mode
3415 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3417 /* All floating point compares return CCFP if it is an equality
3418 comparison, and CCFPE otherwise. */
3419 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3421 switch (code)
3423 case EQ:
3424 case NE:
3425 case UNORDERED:
3426 case ORDERED:
3427 case UNLT:
3428 case UNLE:
3429 case UNGT:
3430 case UNGE:
3431 case UNEQ:
3432 case LTGT:
3433 return CCFPmode;
3435 case LT:
3436 case LE:
3437 case GT:
3438 case GE:
3439 return CCFPEmode;
3441 default:
3442 gcc_unreachable ();
3446 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3447 && y == const0_rtx
3448 && (code == EQ || code == NE || code == LT || code == GE)
3449 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3450 || GET_CODE (x) == NEG))
3451 return CC_NZmode;
3453 /* A compare with a shifted operand. Because of canonicalization,
3454 the comparison will have to be swapped when we emit the assembly
3455 code. */
3456 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3457 && (REG_P (y) || GET_CODE (y) == SUBREG)
3458 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3459 || GET_CODE (x) == LSHIFTRT
3460 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3461 return CC_SWPmode;
3463 /* Similarly for a negated operand, but we can only do this for
3464 equalities. */
3465 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3466 && (REG_P (y) || GET_CODE (y) == SUBREG)
3467 && (code == EQ || code == NE)
3468 && GET_CODE (x) == NEG)
3469 return CC_Zmode;
3471 /* A compare of a mode narrower than SI mode against zero can be done
3472 by extending the value in the comparison. */
3473 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3474 && y == const0_rtx)
3475 /* Only use sign-extension if we really need it. */
3476 return ((code == GT || code == GE || code == LE || code == LT)
3477 ? CC_SESWPmode : CC_ZESWPmode);
3479 /* For everything else, return CCmode. */
3480 return CCmode;
3484 aarch64_get_condition_code (rtx x)
3486 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3487 enum rtx_code comp_code = GET_CODE (x);
3489 if (GET_MODE_CLASS (mode) != MODE_CC)
3490 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3492 switch (mode)
3494 case CCFPmode:
3495 case CCFPEmode:
3496 switch (comp_code)
3498 case GE: return AARCH64_GE;
3499 case GT: return AARCH64_GT;
3500 case LE: return AARCH64_LS;
3501 case LT: return AARCH64_MI;
3502 case NE: return AARCH64_NE;
3503 case EQ: return AARCH64_EQ;
3504 case ORDERED: return AARCH64_VC;
3505 case UNORDERED: return AARCH64_VS;
3506 case UNLT: return AARCH64_LT;
3507 case UNLE: return AARCH64_LE;
3508 case UNGT: return AARCH64_HI;
3509 case UNGE: return AARCH64_PL;
3510 default: return -1;
3512 break;
3514 case CCmode:
3515 switch (comp_code)
3517 case NE: return AARCH64_NE;
3518 case EQ: return AARCH64_EQ;
3519 case GE: return AARCH64_GE;
3520 case GT: return AARCH64_GT;
3521 case LE: return AARCH64_LE;
3522 case LT: return AARCH64_LT;
3523 case GEU: return AARCH64_CS;
3524 case GTU: return AARCH64_HI;
3525 case LEU: return AARCH64_LS;
3526 case LTU: return AARCH64_CC;
3527 default: return -1;
3529 break;
3531 case CC_SWPmode:
3532 case CC_ZESWPmode:
3533 case CC_SESWPmode:
3534 switch (comp_code)
3536 case NE: return AARCH64_NE;
3537 case EQ: return AARCH64_EQ;
3538 case GE: return AARCH64_LE;
3539 case GT: return AARCH64_LT;
3540 case LE: return AARCH64_GE;
3541 case LT: return AARCH64_GT;
3542 case GEU: return AARCH64_LS;
3543 case GTU: return AARCH64_CC;
3544 case LEU: return AARCH64_CS;
3545 case LTU: return AARCH64_HI;
3546 default: return -1;
3548 break;
3550 case CC_NZmode:
3551 switch (comp_code)
3553 case NE: return AARCH64_NE;
3554 case EQ: return AARCH64_EQ;
3555 case GE: return AARCH64_PL;
3556 case LT: return AARCH64_MI;
3557 default: return -1;
3559 break;
3561 case CC_Zmode:
3562 switch (comp_code)
3564 case NE: return AARCH64_NE;
3565 case EQ: return AARCH64_EQ;
3566 default: return -1;
3568 break;
3570 default:
3571 return -1;
3572 break;
3576 bool
3577 aarch64_const_vec_all_same_in_range_p (rtx x,
3578 HOST_WIDE_INT minval,
3579 HOST_WIDE_INT maxval)
3581 HOST_WIDE_INT firstval;
3582 int count, i;
3584 if (GET_CODE (x) != CONST_VECTOR
3585 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3586 return false;
3588 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3589 if (firstval < minval || firstval > maxval)
3590 return false;
3592 count = CONST_VECTOR_NUNITS (x);
3593 for (i = 1; i < count; i++)
3594 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3595 return false;
3597 return true;
3600 bool
3601 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3603 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3606 static unsigned
3607 bit_count (unsigned HOST_WIDE_INT value)
3609 unsigned count = 0;
3611 while (value)
3613 count++;
3614 value &= value - 1;
3617 return count;
3620 void
3621 aarch64_print_operand (FILE *f, rtx x, char code)
3623 switch (code)
3625 /* An integer or symbol address without a preceding # sign. */
3626 case 'c':
3627 switch (GET_CODE (x))
3629 case CONST_INT:
3630 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3631 break;
3633 case SYMBOL_REF:
3634 output_addr_const (f, x);
3635 break;
3637 case CONST:
3638 if (GET_CODE (XEXP (x, 0)) == PLUS
3639 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3641 output_addr_const (f, x);
3642 break;
3644 /* Fall through. */
3646 default:
3647 output_operand_lossage ("Unsupported operand for code '%c'", code);
3649 break;
3651 case 'e':
3652 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3654 int n;
3656 if (!CONST_INT_P (x)
3657 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3659 output_operand_lossage ("invalid operand for '%%%c'", code);
3660 return;
3663 switch (n)
3665 case 3:
3666 fputc ('b', f);
3667 break;
3668 case 4:
3669 fputc ('h', f);
3670 break;
3671 case 5:
3672 fputc ('w', f);
3673 break;
3674 default:
3675 output_operand_lossage ("invalid operand for '%%%c'", code);
3676 return;
3679 break;
3681 case 'p':
3683 int n;
3685 /* Print N such that 2^N == X. */
3686 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
3688 output_operand_lossage ("invalid operand for '%%%c'", code);
3689 return;
3692 asm_fprintf (f, "%d", n);
3694 break;
3696 case 'P':
3697 /* Print the number of non-zero bits in X (a const_int). */
3698 if (!CONST_INT_P (x))
3700 output_operand_lossage ("invalid operand for '%%%c'", code);
3701 return;
3704 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3705 break;
3707 case 'H':
3708 /* Print the higher numbered register of a pair (TImode) of regs. */
3709 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
3711 output_operand_lossage ("invalid operand for '%%%c'", code);
3712 return;
3715 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3716 break;
3718 case 'm':
3720 int cond_code;
3721 /* Print a condition (eq, ne, etc). */
3723 /* CONST_TRUE_RTX means always -- that's the default. */
3724 if (x == const_true_rtx)
3725 return;
3727 if (!COMPARISON_P (x))
3729 output_operand_lossage ("invalid operand for '%%%c'", code);
3730 return;
3733 cond_code = aarch64_get_condition_code (x);
3734 gcc_assert (cond_code >= 0);
3735 fputs (aarch64_condition_codes[cond_code], f);
3737 break;
3739 case 'M':
3741 int cond_code;
3742 /* Print the inverse of a condition (eq <-> ne, etc). */
3744 /* CONST_TRUE_RTX means never -- that's the default. */
3745 if (x == const_true_rtx)
3747 fputs ("nv", f);
3748 return;
3751 if (!COMPARISON_P (x))
3753 output_operand_lossage ("invalid operand for '%%%c'", code);
3754 return;
3756 cond_code = aarch64_get_condition_code (x);
3757 gcc_assert (cond_code >= 0);
3758 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3759 (cond_code)], f);
3761 break;
3763 case 'b':
3764 case 'h':
3765 case 's':
3766 case 'd':
3767 case 'q':
3768 /* Print a scalar FP/SIMD register name. */
3769 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3771 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3772 return;
3774 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3775 break;
3777 case 'S':
3778 case 'T':
3779 case 'U':
3780 case 'V':
3781 /* Print the first FP/SIMD register name in a list. */
3782 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3784 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3785 return;
3787 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3788 break;
3790 case 'X':
3791 /* Print bottom 16 bits of integer constant in hex. */
3792 if (!CONST_INT_P (x))
3794 output_operand_lossage ("invalid operand for '%%%c'", code);
3795 return;
3797 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3798 break;
3800 case 'w':
3801 case 'x':
3802 /* Print a general register name or the zero register (32-bit or
3803 64-bit). */
3804 if (x == const0_rtx
3805 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3807 asm_fprintf (f, "%czr", code);
3808 break;
3811 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3813 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3814 break;
3817 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3819 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3820 break;
3823 /* Fall through */
3825 case 0:
3826 /* Print a normal operand, if it's a general register, then we
3827 assume DImode. */
3828 if (x == NULL)
3830 output_operand_lossage ("missing operand");
3831 return;
3834 switch (GET_CODE (x))
3836 case REG:
3837 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3838 break;
3840 case MEM:
3841 aarch64_memory_reference_mode = GET_MODE (x);
3842 output_address (XEXP (x, 0));
3843 break;
3845 case LABEL_REF:
3846 case SYMBOL_REF:
3847 output_addr_const (asm_out_file, x);
3848 break;
3850 case CONST_INT:
3851 asm_fprintf (f, "%wd", INTVAL (x));
3852 break;
3854 case CONST_VECTOR:
3855 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3857 gcc_assert (
3858 aarch64_const_vec_all_same_in_range_p (x,
3859 HOST_WIDE_INT_MIN,
3860 HOST_WIDE_INT_MAX));
3861 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3863 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3865 fputc ('0', f);
3867 else
3868 gcc_unreachable ();
3869 break;
3871 case CONST_DOUBLE:
3872 /* CONST_DOUBLE can represent a double-width integer.
3873 In this case, the mode of x is VOIDmode. */
3874 if (GET_MODE (x) == VOIDmode)
3875 ; /* Do Nothing. */
3876 else if (aarch64_float_const_zero_rtx_p (x))
3878 fputc ('0', f);
3879 break;
3881 else if (aarch64_float_const_representable_p (x))
3883 #define buf_size 20
3884 char float_buf[buf_size] = {'\0'};
3885 REAL_VALUE_TYPE r;
3886 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3887 real_to_decimal_for_mode (float_buf, &r,
3888 buf_size, buf_size,
3889 1, GET_MODE (x));
3890 asm_fprintf (asm_out_file, "%s", float_buf);
3891 break;
3892 #undef buf_size
3894 output_operand_lossage ("invalid constant");
3895 return;
3896 default:
3897 output_operand_lossage ("invalid operand");
3898 return;
3900 break;
3902 case 'A':
3903 if (GET_CODE (x) == HIGH)
3904 x = XEXP (x, 0);
3906 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3908 case SYMBOL_SMALL_GOT:
3909 asm_fprintf (asm_out_file, ":got:");
3910 break;
3912 case SYMBOL_SMALL_TLSGD:
3913 asm_fprintf (asm_out_file, ":tlsgd:");
3914 break;
3916 case SYMBOL_SMALL_TLSDESC:
3917 asm_fprintf (asm_out_file, ":tlsdesc:");
3918 break;
3920 case SYMBOL_SMALL_GOTTPREL:
3921 asm_fprintf (asm_out_file, ":gottprel:");
3922 break;
3924 case SYMBOL_SMALL_TPREL:
3925 asm_fprintf (asm_out_file, ":tprel:");
3926 break;
3928 case SYMBOL_TINY_GOT:
3929 gcc_unreachable ();
3930 break;
3932 default:
3933 break;
3935 output_addr_const (asm_out_file, x);
3936 break;
3938 case 'L':
3939 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3941 case SYMBOL_SMALL_GOT:
3942 asm_fprintf (asm_out_file, ":lo12:");
3943 break;
3945 case SYMBOL_SMALL_TLSGD:
3946 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3947 break;
3949 case SYMBOL_SMALL_TLSDESC:
3950 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3951 break;
3953 case SYMBOL_SMALL_GOTTPREL:
3954 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3955 break;
3957 case SYMBOL_SMALL_TPREL:
3958 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3959 break;
3961 case SYMBOL_TINY_GOT:
3962 asm_fprintf (asm_out_file, ":got:");
3963 break;
3965 default:
3966 break;
3968 output_addr_const (asm_out_file, x);
3969 break;
3971 case 'G':
3973 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3975 case SYMBOL_SMALL_TPREL:
3976 asm_fprintf (asm_out_file, ":tprel_hi12:");
3977 break;
3978 default:
3979 break;
3981 output_addr_const (asm_out_file, x);
3982 break;
3984 default:
3985 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3986 return;
3990 void
3991 aarch64_print_operand_address (FILE *f, rtx x)
3993 struct aarch64_address_info addr;
3995 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3996 MEM, true))
3997 switch (addr.type)
3999 case ADDRESS_REG_IMM:
4000 if (addr.offset == const0_rtx)
4001 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4002 else
4003 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4004 INTVAL (addr.offset));
4005 return;
4007 case ADDRESS_REG_REG:
4008 if (addr.shift == 0)
4009 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4010 reg_names [REGNO (addr.offset)]);
4011 else
4012 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4013 reg_names [REGNO (addr.offset)], addr.shift);
4014 return;
4016 case ADDRESS_REG_UXTW:
4017 if (addr.shift == 0)
4018 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4019 REGNO (addr.offset) - R0_REGNUM);
4020 else
4021 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4022 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4023 return;
4025 case ADDRESS_REG_SXTW:
4026 if (addr.shift == 0)
4027 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4028 REGNO (addr.offset) - R0_REGNUM);
4029 else
4030 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4031 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4032 return;
4034 case ADDRESS_REG_WB:
4035 switch (GET_CODE (x))
4037 case PRE_INC:
4038 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4039 GET_MODE_SIZE (aarch64_memory_reference_mode));
4040 return;
4041 case POST_INC:
4042 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4043 GET_MODE_SIZE (aarch64_memory_reference_mode));
4044 return;
4045 case PRE_DEC:
4046 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4047 GET_MODE_SIZE (aarch64_memory_reference_mode));
4048 return;
4049 case POST_DEC:
4050 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4051 GET_MODE_SIZE (aarch64_memory_reference_mode));
4052 return;
4053 case PRE_MODIFY:
4054 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4055 INTVAL (addr.offset));
4056 return;
4057 case POST_MODIFY:
4058 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4059 INTVAL (addr.offset));
4060 return;
4061 default:
4062 break;
4064 break;
4066 case ADDRESS_LO_SUM:
4067 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4068 output_addr_const (f, addr.offset);
4069 asm_fprintf (f, "]");
4070 return;
4072 case ADDRESS_SYMBOLIC:
4073 break;
4076 output_addr_const (f, x);
4079 bool
4080 aarch64_label_mentioned_p (rtx x)
4082 const char *fmt;
4083 int i;
4085 if (GET_CODE (x) == LABEL_REF)
4086 return true;
4088 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4089 referencing instruction, but they are constant offsets, not
4090 symbols. */
4091 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4092 return false;
4094 fmt = GET_RTX_FORMAT (GET_CODE (x));
4095 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4097 if (fmt[i] == 'E')
4099 int j;
4101 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4102 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4103 return 1;
4105 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4106 return 1;
4109 return 0;
4112 /* Implement REGNO_REG_CLASS. */
4114 enum reg_class
4115 aarch64_regno_regclass (unsigned regno)
4117 if (GP_REGNUM_P (regno))
4118 return GENERAL_REGS;
4120 if (regno == SP_REGNUM)
4121 return STACK_REG;
4123 if (regno == FRAME_POINTER_REGNUM
4124 || regno == ARG_POINTER_REGNUM)
4125 return POINTER_REGS;
4127 if (FP_REGNUM_P (regno))
4128 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4130 return NO_REGS;
4133 /* Try a machine-dependent way of reloading an illegitimate address
4134 operand. If we find one, push the reload and return the new rtx. */
4137 aarch64_legitimize_reload_address (rtx *x_p,
4138 enum machine_mode mode,
4139 int opnum, int type,
4140 int ind_levels ATTRIBUTE_UNUSED)
4142 rtx x = *x_p;
4144 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4145 if (aarch64_vect_struct_mode_p (mode)
4146 && GET_CODE (x) == PLUS
4147 && REG_P (XEXP (x, 0))
4148 && CONST_INT_P (XEXP (x, 1)))
4150 rtx orig_rtx = x;
4151 x = copy_rtx (x);
4152 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4153 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4154 opnum, (enum reload_type) type);
4155 return x;
4158 /* We must recognize output that we have already generated ourselves. */
4159 if (GET_CODE (x) == PLUS
4160 && GET_CODE (XEXP (x, 0)) == PLUS
4161 && REG_P (XEXP (XEXP (x, 0), 0))
4162 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4163 && CONST_INT_P (XEXP (x, 1)))
4165 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4166 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4167 opnum, (enum reload_type) type);
4168 return x;
4171 /* We wish to handle large displacements off a base register by splitting
4172 the addend across an add and the mem insn. This can cut the number of
4173 extra insns needed from 3 to 1. It is only useful for load/store of a
4174 single register with 12 bit offset field. */
4175 if (GET_CODE (x) == PLUS
4176 && REG_P (XEXP (x, 0))
4177 && CONST_INT_P (XEXP (x, 1))
4178 && HARD_REGISTER_P (XEXP (x, 0))
4179 && mode != TImode
4180 && mode != TFmode
4181 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4183 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4184 HOST_WIDE_INT low = val & 0xfff;
4185 HOST_WIDE_INT high = val - low;
4186 HOST_WIDE_INT offs;
4187 rtx cst;
4188 enum machine_mode xmode = GET_MODE (x);
4190 /* In ILP32, xmode can be either DImode or SImode. */
4191 gcc_assert (xmode == DImode || xmode == SImode);
4193 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4194 BLKmode alignment. */
4195 if (GET_MODE_SIZE (mode) == 0)
4196 return NULL_RTX;
4198 offs = low % GET_MODE_SIZE (mode);
4200 /* Align misaligned offset by adjusting high part to compensate. */
4201 if (offs != 0)
4203 if (aarch64_uimm12_shift (high + offs))
4205 /* Align down. */
4206 low = low - offs;
4207 high = high + offs;
4209 else
4211 /* Align up. */
4212 offs = GET_MODE_SIZE (mode) - offs;
4213 low = low + offs;
4214 high = high + (low & 0x1000) - offs;
4215 low &= 0xfff;
4219 /* Check for overflow. */
4220 if (high + low != val)
4221 return NULL_RTX;
4223 cst = GEN_INT (high);
4224 if (!aarch64_uimm12_shift (high))
4225 cst = force_const_mem (xmode, cst);
4227 /* Reload high part into base reg, leaving the low part
4228 in the mem instruction.
4229 Note that replacing this gen_rtx_PLUS with plus_constant is
4230 wrong in this case because we rely on the
4231 (plus (plus reg c1) c2) structure being preserved so that
4232 XEXP (*p, 0) in push_reload below uses the correct term. */
4233 x = gen_rtx_PLUS (xmode,
4234 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4235 GEN_INT (low));
4237 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4238 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4239 opnum, (enum reload_type) type);
4240 return x;
4243 return NULL_RTX;
4247 static reg_class_t
4248 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4249 reg_class_t rclass,
4250 enum machine_mode mode,
4251 secondary_reload_info *sri)
4253 /* Without the TARGET_SIMD instructions we cannot move a Q register
4254 to a Q register directly. We need a scratch. */
4255 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4256 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4257 && reg_class_subset_p (rclass, FP_REGS))
4259 if (mode == TFmode)
4260 sri->icode = CODE_FOR_aarch64_reload_movtf;
4261 else if (mode == TImode)
4262 sri->icode = CODE_FOR_aarch64_reload_movti;
4263 return NO_REGS;
4266 /* A TFmode or TImode memory access should be handled via an FP_REGS
4267 because AArch64 has richer addressing modes for LDR/STR instructions
4268 than LDP/STP instructions. */
4269 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4270 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4271 return FP_REGS;
4273 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4274 return GENERAL_REGS;
4276 return NO_REGS;
4279 static bool
4280 aarch64_can_eliminate (const int from, const int to)
4282 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4283 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4285 if (frame_pointer_needed)
4287 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4288 return true;
4289 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4290 return false;
4291 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4292 && !cfun->calls_alloca)
4293 return true;
4294 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4295 return true;
4297 return false;
4300 return true;
4303 HOST_WIDE_INT
4304 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4306 aarch64_layout_frame ();
4308 if (to == HARD_FRAME_POINTER_REGNUM)
4310 if (from == ARG_POINTER_REGNUM)
4311 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4313 if (from == FRAME_POINTER_REGNUM)
4314 return (cfun->machine->frame.hard_fp_offset
4315 - cfun->machine->frame.saved_varargs_size);
4318 if (to == STACK_POINTER_REGNUM)
4320 if (from == FRAME_POINTER_REGNUM)
4321 return (cfun->machine->frame.frame_size
4322 - cfun->machine->frame.saved_varargs_size);
4325 return cfun->machine->frame.frame_size;
4328 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4329 previous frame. */
4332 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4334 if (count != 0)
4335 return const0_rtx;
4336 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4340 static void
4341 aarch64_asm_trampoline_template (FILE *f)
4343 if (TARGET_ILP32)
4345 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4346 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4348 else
4350 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4351 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4353 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4354 assemble_aligned_integer (4, const0_rtx);
4355 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4356 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4359 static void
4360 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4362 rtx fnaddr, mem, a_tramp;
4363 const int tramp_code_sz = 16;
4365 /* Don't need to copy the trailing D-words, we fill those in below. */
4366 emit_block_move (m_tramp, assemble_trampoline_template (),
4367 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4368 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4369 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4370 if (GET_MODE (fnaddr) != ptr_mode)
4371 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4372 emit_move_insn (mem, fnaddr);
4374 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4375 emit_move_insn (mem, chain_value);
4377 /* XXX We should really define a "clear_cache" pattern and use
4378 gen_clear_cache(). */
4379 a_tramp = XEXP (m_tramp, 0);
4380 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4381 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4382 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4383 ptr_mode);
4386 static unsigned char
4387 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4389 switch (regclass)
4391 case CALLER_SAVE_REGS:
4392 case POINTER_REGS:
4393 case GENERAL_REGS:
4394 case ALL_REGS:
4395 case FP_REGS:
4396 case FP_LO_REGS:
4397 return
4398 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4399 (GET_MODE_SIZE (mode) + 7) / 8;
4400 case STACK_REG:
4401 return 1;
4403 case NO_REGS:
4404 return 0;
4406 default:
4407 break;
4409 gcc_unreachable ();
4412 static reg_class_t
4413 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4415 if (regclass == POINTER_REGS)
4416 return GENERAL_REGS;
4418 if (regclass == STACK_REG)
4420 if (REG_P(x)
4421 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4422 return regclass;
4424 return NO_REGS;
4427 /* If it's an integer immediate that MOVI can't handle, then
4428 FP_REGS is not an option, so we return NO_REGS instead. */
4429 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4430 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4431 return NO_REGS;
4433 /* Register eliminiation can result in a request for
4434 SP+constant->FP_REGS. We cannot support such operations which
4435 use SP as source and an FP_REG as destination, so reject out
4436 right now. */
4437 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4439 rtx lhs = XEXP (x, 0);
4441 /* Look through a possible SUBREG introduced by ILP32. */
4442 if (GET_CODE (lhs) == SUBREG)
4443 lhs = SUBREG_REG (lhs);
4445 gcc_assert (REG_P (lhs));
4446 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4447 POINTER_REGS));
4448 return NO_REGS;
4451 return regclass;
4454 void
4455 aarch64_asm_output_labelref (FILE* f, const char *name)
4457 asm_fprintf (f, "%U%s", name);
4460 static void
4461 aarch64_elf_asm_constructor (rtx symbol, int priority)
4463 if (priority == DEFAULT_INIT_PRIORITY)
4464 default_ctor_section_asm_out_constructor (symbol, priority);
4465 else
4467 section *s;
4468 char buf[18];
4469 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4470 s = get_section (buf, SECTION_WRITE, NULL);
4471 switch_to_section (s);
4472 assemble_align (POINTER_SIZE);
4473 assemble_aligned_integer (POINTER_BYTES, symbol);
4477 static void
4478 aarch64_elf_asm_destructor (rtx symbol, int priority)
4480 if (priority == DEFAULT_INIT_PRIORITY)
4481 default_dtor_section_asm_out_destructor (symbol, priority);
4482 else
4484 section *s;
4485 char buf[18];
4486 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4487 s = get_section (buf, SECTION_WRITE, NULL);
4488 switch_to_section (s);
4489 assemble_align (POINTER_SIZE);
4490 assemble_aligned_integer (POINTER_BYTES, symbol);
4494 const char*
4495 aarch64_output_casesi (rtx *operands)
4497 char buf[100];
4498 char label[100];
4499 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
4500 int index;
4501 static const char *const patterns[4][2] =
4504 "ldrb\t%w3, [%0,%w1,uxtw]",
4505 "add\t%3, %4, %w3, sxtb #2"
4508 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4509 "add\t%3, %4, %w3, sxth #2"
4512 "ldr\t%w3, [%0,%w1,uxtw #2]",
4513 "add\t%3, %4, %w3, sxtw #2"
4515 /* We assume that DImode is only generated when not optimizing and
4516 that we don't really need 64-bit address offsets. That would
4517 imply an object file with 8GB of code in a single function! */
4519 "ldr\t%w3, [%0,%w1,uxtw #2]",
4520 "add\t%3, %4, %w3, sxtw #2"
4524 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4526 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4528 gcc_assert (index >= 0 && index <= 3);
4530 /* Need to implement table size reduction, by chaning the code below. */
4531 output_asm_insn (patterns[index][0], operands);
4532 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4533 snprintf (buf, sizeof (buf),
4534 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4535 output_asm_insn (buf, operands);
4536 output_asm_insn (patterns[index][1], operands);
4537 output_asm_insn ("br\t%3", operands);
4538 assemble_label (asm_out_file, label);
4539 return "";
4543 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4544 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4545 operator. */
4548 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4550 if (shift >= 0 && shift <= 3)
4552 int size;
4553 for (size = 8; size <= 32; size *= 2)
4555 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4556 if (mask == bits << shift)
4557 return size;
4560 return 0;
4563 static bool
4564 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4565 const_rtx x ATTRIBUTE_UNUSED)
4567 /* We can't use blocks for constants when we're using a per-function
4568 constant pool. */
4569 return false;
4572 static section *
4573 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4574 rtx x ATTRIBUTE_UNUSED,
4575 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4577 /* Force all constant pool entries into the current function section. */
4578 return function_section (current_function_decl);
4582 /* Costs. */
4584 /* Helper function for rtx cost calculation. Strip a shift expression
4585 from X. Returns the inner operand if successful, or the original
4586 expression on failure. */
4587 static rtx
4588 aarch64_strip_shift (rtx x)
4590 rtx op = x;
4592 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4593 we can convert both to ROR during final output. */
4594 if ((GET_CODE (op) == ASHIFT
4595 || GET_CODE (op) == ASHIFTRT
4596 || GET_CODE (op) == LSHIFTRT
4597 || GET_CODE (op) == ROTATERT
4598 || GET_CODE (op) == ROTATE)
4599 && CONST_INT_P (XEXP (op, 1)))
4600 return XEXP (op, 0);
4602 if (GET_CODE (op) == MULT
4603 && CONST_INT_P (XEXP (op, 1))
4604 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4605 return XEXP (op, 0);
4607 return x;
4610 /* Helper function for rtx cost calculation. Strip an extend
4611 expression from X. Returns the inner operand if successful, or the
4612 original expression on failure. We deal with a number of possible
4613 canonicalization variations here. */
4614 static rtx
4615 aarch64_strip_extend (rtx x)
4617 rtx op = x;
4619 /* Zero and sign extraction of a widened value. */
4620 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4621 && XEXP (op, 2) == const0_rtx
4622 && GET_CODE (XEXP (op, 0)) == MULT
4623 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4624 XEXP (op, 1)))
4625 return XEXP (XEXP (op, 0), 0);
4627 /* It can also be represented (for zero-extend) as an AND with an
4628 immediate. */
4629 if (GET_CODE (op) == AND
4630 && GET_CODE (XEXP (op, 0)) == MULT
4631 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4632 && CONST_INT_P (XEXP (op, 1))
4633 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4634 INTVAL (XEXP (op, 1))) != 0)
4635 return XEXP (XEXP (op, 0), 0);
4637 /* Now handle extended register, as this may also have an optional
4638 left shift by 1..4. */
4639 if (GET_CODE (op) == ASHIFT
4640 && CONST_INT_P (XEXP (op, 1))
4641 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4642 op = XEXP (op, 0);
4644 if (GET_CODE (op) == ZERO_EXTEND
4645 || GET_CODE (op) == SIGN_EXTEND)
4646 op = XEXP (op, 0);
4648 if (op != x)
4649 return op;
4651 return x;
4654 /* Helper function for rtx cost calculation. Calculate the cost of
4655 a MULT, which may be part of a multiply-accumulate rtx. Return
4656 the calculated cost of the expression, recursing manually in to
4657 operands where needed. */
4659 static int
4660 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4662 rtx op0, op1;
4663 const struct cpu_cost_table *extra_cost
4664 = aarch64_tune_params->insn_extra_cost;
4665 int cost = 0;
4666 bool maybe_fma = (outer == PLUS || outer == MINUS);
4667 enum machine_mode mode = GET_MODE (x);
4669 gcc_checking_assert (code == MULT);
4671 op0 = XEXP (x, 0);
4672 op1 = XEXP (x, 1);
4674 if (VECTOR_MODE_P (mode))
4675 mode = GET_MODE_INNER (mode);
4677 /* Integer multiply/fma. */
4678 if (GET_MODE_CLASS (mode) == MODE_INT)
4680 /* The multiply will be canonicalized as a shift, cost it as such. */
4681 if (CONST_INT_P (op1)
4682 && exact_log2 (INTVAL (op1)) > 0)
4684 if (speed)
4686 if (maybe_fma)
4687 /* ADD (shifted register). */
4688 cost += extra_cost->alu.arith_shift;
4689 else
4690 /* LSL (immediate). */
4691 cost += extra_cost->alu.shift;
4694 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4696 return cost;
4699 /* Integer multiplies or FMAs have zero/sign extending variants. */
4700 if ((GET_CODE (op0) == ZERO_EXTEND
4701 && GET_CODE (op1) == ZERO_EXTEND)
4702 || (GET_CODE (op0) == SIGN_EXTEND
4703 && GET_CODE (op1) == SIGN_EXTEND))
4705 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4706 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4708 if (speed)
4710 if (maybe_fma)
4711 /* MADD/SMADDL/UMADDL. */
4712 cost += extra_cost->mult[0].extend_add;
4713 else
4714 /* MUL/SMULL/UMULL. */
4715 cost += extra_cost->mult[0].extend;
4718 return cost;
4721 /* This is either an integer multiply or an FMA. In both cases
4722 we want to recurse and cost the operands. */
4723 cost += rtx_cost (op0, MULT, 0, speed)
4724 + rtx_cost (op1, MULT, 1, speed);
4726 if (speed)
4728 if (maybe_fma)
4729 /* MADD. */
4730 cost += extra_cost->mult[mode == DImode].add;
4731 else
4732 /* MUL. */
4733 cost += extra_cost->mult[mode == DImode].simple;
4736 return cost;
4738 else
4740 if (speed)
4742 /* Floating-point FMA/FMUL can also support negations of the
4743 operands. */
4744 if (GET_CODE (op0) == NEG)
4745 op0 = XEXP (op0, 0);
4746 if (GET_CODE (op1) == NEG)
4747 op1 = XEXP (op1, 0);
4749 if (maybe_fma)
4750 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4751 cost += extra_cost->fp[mode == DFmode].fma;
4752 else
4753 /* FMUL/FNMUL. */
4754 cost += extra_cost->fp[mode == DFmode].mult;
4757 cost += rtx_cost (op0, MULT, 0, speed)
4758 + rtx_cost (op1, MULT, 1, speed);
4759 return cost;
4763 static int
4764 aarch64_address_cost (rtx x,
4765 enum machine_mode mode,
4766 addr_space_t as ATTRIBUTE_UNUSED,
4767 bool speed)
4769 enum rtx_code c = GET_CODE (x);
4770 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4771 struct aarch64_address_info info;
4772 int cost = 0;
4773 info.shift = 0;
4775 if (!aarch64_classify_address (&info, x, mode, c, false))
4777 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4779 /* This is a CONST or SYMBOL ref which will be split
4780 in a different way depending on the code model in use.
4781 Cost it through the generic infrastructure. */
4782 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4783 /* Divide through by the cost of one instruction to
4784 bring it to the same units as the address costs. */
4785 cost_symbol_ref /= COSTS_N_INSNS (1);
4786 /* The cost is then the cost of preparing the address,
4787 followed by an immediate (possibly 0) offset. */
4788 return cost_symbol_ref + addr_cost->imm_offset;
4790 else
4792 /* This is most likely a jump table from a case
4793 statement. */
4794 return addr_cost->register_offset;
4798 switch (info.type)
4800 case ADDRESS_LO_SUM:
4801 case ADDRESS_SYMBOLIC:
4802 case ADDRESS_REG_IMM:
4803 cost += addr_cost->imm_offset;
4804 break;
4806 case ADDRESS_REG_WB:
4807 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4808 cost += addr_cost->pre_modify;
4809 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4810 cost += addr_cost->post_modify;
4811 else
4812 gcc_unreachable ();
4814 break;
4816 case ADDRESS_REG_REG:
4817 cost += addr_cost->register_offset;
4818 break;
4820 case ADDRESS_REG_UXTW:
4821 case ADDRESS_REG_SXTW:
4822 cost += addr_cost->register_extend;
4823 break;
4825 default:
4826 gcc_unreachable ();
4830 if (info.shift > 0)
4832 /* For the sake of calculating the cost of the shifted register
4833 component, we can treat same sized modes in the same way. */
4834 switch (GET_MODE_BITSIZE (mode))
4836 case 16:
4837 cost += addr_cost->addr_scale_costs.hi;
4838 break;
4840 case 32:
4841 cost += addr_cost->addr_scale_costs.si;
4842 break;
4844 case 64:
4845 cost += addr_cost->addr_scale_costs.di;
4846 break;
4848 /* We can't tell, or this is a 128-bit vector. */
4849 default:
4850 cost += addr_cost->addr_scale_costs.ti;
4851 break;
4855 return cost;
4858 /* Return true if the RTX X in mode MODE is a zero or sign extract
4859 usable in an ADD or SUB (extended register) instruction. */
4860 static bool
4861 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4863 /* Catch add with a sign extract.
4864 This is add_<optab><mode>_multp2. */
4865 if (GET_CODE (x) == SIGN_EXTRACT
4866 || GET_CODE (x) == ZERO_EXTRACT)
4868 rtx op0 = XEXP (x, 0);
4869 rtx op1 = XEXP (x, 1);
4870 rtx op2 = XEXP (x, 2);
4872 if (GET_CODE (op0) == MULT
4873 && CONST_INT_P (op1)
4874 && op2 == const0_rtx
4875 && CONST_INT_P (XEXP (op0, 1))
4876 && aarch64_is_extend_from_extract (mode,
4877 XEXP (op0, 1),
4878 op1))
4880 return true;
4884 return false;
4887 static bool
4888 aarch64_frint_unspec_p (unsigned int u)
4890 switch (u)
4892 case UNSPEC_FRINTZ:
4893 case UNSPEC_FRINTP:
4894 case UNSPEC_FRINTM:
4895 case UNSPEC_FRINTA:
4896 case UNSPEC_FRINTN:
4897 case UNSPEC_FRINTX:
4898 case UNSPEC_FRINTI:
4899 return true;
4901 default:
4902 return false;
4906 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4907 storing it in *COST. Result is true if the total cost of the operation
4908 has now been calculated. */
4909 static bool
4910 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4912 rtx inner;
4913 rtx comparator;
4914 enum rtx_code cmpcode;
4916 if (COMPARISON_P (op0))
4918 inner = XEXP (op0, 0);
4919 comparator = XEXP (op0, 1);
4920 cmpcode = GET_CODE (op0);
4922 else
4924 inner = op0;
4925 comparator = const0_rtx;
4926 cmpcode = NE;
4929 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4931 /* Conditional branch. */
4932 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4933 return true;
4934 else
4936 if (cmpcode == NE || cmpcode == EQ)
4938 if (comparator == const0_rtx)
4940 /* TBZ/TBNZ/CBZ/CBNZ. */
4941 if (GET_CODE (inner) == ZERO_EXTRACT)
4942 /* TBZ/TBNZ. */
4943 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4944 0, speed);
4945 else
4946 /* CBZ/CBNZ. */
4947 *cost += rtx_cost (inner, cmpcode, 0, speed);
4949 return true;
4952 else if (cmpcode == LT || cmpcode == GE)
4954 /* TBZ/TBNZ. */
4955 if (comparator == const0_rtx)
4956 return true;
4960 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4962 /* It's a conditional operation based on the status flags,
4963 so it must be some flavor of CSEL. */
4965 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4966 if (GET_CODE (op1) == NEG
4967 || GET_CODE (op1) == NOT
4968 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4969 op1 = XEXP (op1, 0);
4971 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4972 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4973 return true;
4976 /* We don't know what this is, cost all operands. */
4977 return false;
4980 /* Calculate the cost of calculating X, storing it in *COST. Result
4981 is true if the total cost of the operation has now been calculated. */
4982 static bool
4983 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4984 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4986 rtx op0, op1, op2;
4987 const struct cpu_cost_table *extra_cost
4988 = aarch64_tune_params->insn_extra_cost;
4989 enum machine_mode mode = GET_MODE (x);
4991 /* By default, assume that everything has equivalent cost to the
4992 cheapest instruction. Any additional costs are applied as a delta
4993 above this default. */
4994 *cost = COSTS_N_INSNS (1);
4996 /* TODO: The cost infrastructure currently does not handle
4997 vector operations. Assume that all vector operations
4998 are equally expensive. */
4999 if (VECTOR_MODE_P (mode))
5001 if (speed)
5002 *cost += extra_cost->vect.alu;
5003 return true;
5006 switch (code)
5008 case SET:
5009 /* The cost depends entirely on the operands to SET. */
5010 *cost = 0;
5011 op0 = SET_DEST (x);
5012 op1 = SET_SRC (x);
5014 switch (GET_CODE (op0))
5016 case MEM:
5017 if (speed)
5019 rtx address = XEXP (op0, 0);
5020 if (GET_MODE_CLASS (mode) == MODE_INT)
5021 *cost += extra_cost->ldst.store;
5022 else if (mode == SFmode)
5023 *cost += extra_cost->ldst.storef;
5024 else if (mode == DFmode)
5025 *cost += extra_cost->ldst.stored;
5027 *cost +=
5028 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5029 0, speed));
5032 *cost += rtx_cost (op1, SET, 1, speed);
5033 return true;
5035 case SUBREG:
5036 if (! REG_P (SUBREG_REG (op0)))
5037 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5039 /* Fall through. */
5040 case REG:
5041 /* const0_rtx is in general free, but we will use an
5042 instruction to set a register to 0. */
5043 if (REG_P (op1) || op1 == const0_rtx)
5045 /* The cost is 1 per register copied. */
5046 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5047 / UNITS_PER_WORD;
5048 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5050 else
5051 /* Cost is just the cost of the RHS of the set. */
5052 *cost += rtx_cost (op1, SET, 1, speed);
5053 return true;
5055 case ZERO_EXTRACT:
5056 case SIGN_EXTRACT:
5057 /* Bit-field insertion. Strip any redundant widening of
5058 the RHS to meet the width of the target. */
5059 if (GET_CODE (op1) == SUBREG)
5060 op1 = SUBREG_REG (op1);
5061 if ((GET_CODE (op1) == ZERO_EXTEND
5062 || GET_CODE (op1) == SIGN_EXTEND)
5063 && CONST_INT_P (XEXP (op0, 1))
5064 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5065 >= INTVAL (XEXP (op0, 1))))
5066 op1 = XEXP (op1, 0);
5068 if (CONST_INT_P (op1))
5070 /* MOV immediate is assumed to always be cheap. */
5071 *cost = COSTS_N_INSNS (1);
5073 else
5075 /* BFM. */
5076 if (speed)
5077 *cost += extra_cost->alu.bfi;
5078 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5081 return true;
5083 default:
5084 /* We can't make sense of this, assume default cost. */
5085 *cost = COSTS_N_INSNS (1);
5086 return false;
5088 return false;
5090 case CONST_INT:
5091 /* If an instruction can incorporate a constant within the
5092 instruction, the instruction's expression avoids calling
5093 rtx_cost() on the constant. If rtx_cost() is called on a
5094 constant, then it is usually because the constant must be
5095 moved into a register by one or more instructions.
5097 The exception is constant 0, which can be expressed
5098 as XZR/WZR and is therefore free. The exception to this is
5099 if we have (set (reg) (const0_rtx)) in which case we must cost
5100 the move. However, we can catch that when we cost the SET, so
5101 we don't need to consider that here. */
5102 if (x == const0_rtx)
5103 *cost = 0;
5104 else
5106 /* To an approximation, building any other constant is
5107 proportionally expensive to the number of instructions
5108 required to build that constant. This is true whether we
5109 are compiling for SPEED or otherwise. */
5110 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5111 INTVAL (x),
5112 false));
5114 return true;
5116 case CONST_DOUBLE:
5117 if (speed)
5119 /* mov[df,sf]_aarch64. */
5120 if (aarch64_float_const_representable_p (x))
5121 /* FMOV (scalar immediate). */
5122 *cost += extra_cost->fp[mode == DFmode].fpconst;
5123 else if (!aarch64_float_const_zero_rtx_p (x))
5125 /* This will be a load from memory. */
5126 if (mode == DFmode)
5127 *cost += extra_cost->ldst.loadd;
5128 else
5129 *cost += extra_cost->ldst.loadf;
5131 else
5132 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5133 or MOV v0.s[0], wzr - neither of which are modeled by the
5134 cost tables. Just use the default cost. */
5139 return true;
5141 case MEM:
5142 if (speed)
5144 /* For loads we want the base cost of a load, plus an
5145 approximation for the additional cost of the addressing
5146 mode. */
5147 rtx address = XEXP (x, 0);
5148 if (GET_MODE_CLASS (mode) == MODE_INT)
5149 *cost += extra_cost->ldst.load;
5150 else if (mode == SFmode)
5151 *cost += extra_cost->ldst.loadf;
5152 else if (mode == DFmode)
5153 *cost += extra_cost->ldst.loadd;
5155 *cost +=
5156 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5157 0, speed));
5160 return true;
5162 case NEG:
5163 op0 = XEXP (x, 0);
5165 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5167 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5168 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5170 /* CSETM. */
5171 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5172 return true;
5175 /* Cost this as SUB wzr, X. */
5176 op0 = CONST0_RTX (GET_MODE (x));
5177 op1 = XEXP (x, 0);
5178 goto cost_minus;
5181 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5183 /* Support (neg(fma...)) as a single instruction only if
5184 sign of zeros is unimportant. This matches the decision
5185 making in aarch64.md. */
5186 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5188 /* FNMADD. */
5189 *cost = rtx_cost (op0, NEG, 0, speed);
5190 return true;
5192 if (speed)
5193 /* FNEG. */
5194 *cost += extra_cost->fp[mode == DFmode].neg;
5195 return false;
5198 return false;
5200 case CLRSB:
5201 case CLZ:
5202 if (speed)
5203 *cost += extra_cost->alu.clz;
5205 return false;
5207 case COMPARE:
5208 op0 = XEXP (x, 0);
5209 op1 = XEXP (x, 1);
5211 if (op1 == const0_rtx
5212 && GET_CODE (op0) == AND)
5214 x = op0;
5215 goto cost_logic;
5218 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5220 /* TODO: A write to the CC flags possibly costs extra, this
5221 needs encoding in the cost tables. */
5223 /* CC_ZESWPmode supports zero extend for free. */
5224 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5225 op0 = XEXP (op0, 0);
5227 /* ANDS. */
5228 if (GET_CODE (op0) == AND)
5230 x = op0;
5231 goto cost_logic;
5234 if (GET_CODE (op0) == PLUS)
5236 /* ADDS (and CMN alias). */
5237 x = op0;
5238 goto cost_plus;
5241 if (GET_CODE (op0) == MINUS)
5243 /* SUBS. */
5244 x = op0;
5245 goto cost_minus;
5248 if (GET_CODE (op1) == NEG)
5250 /* CMN. */
5251 if (speed)
5252 *cost += extra_cost->alu.arith;
5254 *cost += rtx_cost (op0, COMPARE, 0, speed);
5255 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5256 return true;
5259 /* CMP.
5261 Compare can freely swap the order of operands, and
5262 canonicalization puts the more complex operation first.
5263 But the integer MINUS logic expects the shift/extend
5264 operation in op1. */
5265 if (! (REG_P (op0)
5266 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5268 op0 = XEXP (x, 1);
5269 op1 = XEXP (x, 0);
5271 goto cost_minus;
5274 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5276 /* FCMP. */
5277 if (speed)
5278 *cost += extra_cost->fp[mode == DFmode].compare;
5280 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5282 /* FCMP supports constant 0.0 for no extra cost. */
5283 return true;
5285 return false;
5288 return false;
5290 case MINUS:
5292 op0 = XEXP (x, 0);
5293 op1 = XEXP (x, 1);
5295 cost_minus:
5296 /* Detect valid immediates. */
5297 if ((GET_MODE_CLASS (mode) == MODE_INT
5298 || (GET_MODE_CLASS (mode) == MODE_CC
5299 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5300 && CONST_INT_P (op1)
5301 && aarch64_uimm12_shift (INTVAL (op1)))
5303 *cost += rtx_cost (op0, MINUS, 0, speed);
5305 if (speed)
5306 /* SUB(S) (immediate). */
5307 *cost += extra_cost->alu.arith;
5308 return true;
5312 /* Look for SUB (extended register). */
5313 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5315 if (speed)
5316 *cost += extra_cost->alu.arith_shift;
5318 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5319 (enum rtx_code) GET_CODE (op1),
5320 0, speed);
5321 return true;
5324 rtx new_op1 = aarch64_strip_extend (op1);
5326 /* Cost this as an FMA-alike operation. */
5327 if ((GET_CODE (new_op1) == MULT
5328 || GET_CODE (new_op1) == ASHIFT)
5329 && code != COMPARE)
5331 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5332 (enum rtx_code) code,
5333 speed);
5334 *cost += rtx_cost (op0, MINUS, 0, speed);
5335 return true;
5338 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5340 if (speed)
5342 if (GET_MODE_CLASS (mode) == MODE_INT)
5343 /* SUB(S). */
5344 *cost += extra_cost->alu.arith;
5345 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5346 /* FSUB. */
5347 *cost += extra_cost->fp[mode == DFmode].addsub;
5349 return true;
5352 case PLUS:
5354 rtx new_op0;
5356 op0 = XEXP (x, 0);
5357 op1 = XEXP (x, 1);
5359 cost_plus:
5360 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5361 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5363 /* CSINC. */
5364 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5365 *cost += rtx_cost (op1, PLUS, 1, speed);
5366 return true;
5369 if (GET_MODE_CLASS (mode) == MODE_INT
5370 && CONST_INT_P (op1)
5371 && aarch64_uimm12_shift (INTVAL (op1)))
5373 *cost += rtx_cost (op0, PLUS, 0, speed);
5375 if (speed)
5376 /* ADD (immediate). */
5377 *cost += extra_cost->alu.arith;
5378 return true;
5381 /* Look for ADD (extended register). */
5382 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5384 if (speed)
5385 *cost += extra_cost->alu.arith_shift;
5387 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5388 (enum rtx_code) GET_CODE (op0),
5389 0, speed);
5390 return true;
5393 /* Strip any extend, leave shifts behind as we will
5394 cost them through mult_cost. */
5395 new_op0 = aarch64_strip_extend (op0);
5397 if (GET_CODE (new_op0) == MULT
5398 || GET_CODE (new_op0) == ASHIFT)
5400 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5401 speed);
5402 *cost += rtx_cost (op1, PLUS, 1, speed);
5403 return true;
5406 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5407 + rtx_cost (op1, PLUS, 1, speed));
5409 if (speed)
5411 if (GET_MODE_CLASS (mode) == MODE_INT)
5412 /* ADD. */
5413 *cost += extra_cost->alu.arith;
5414 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5415 /* FADD. */
5416 *cost += extra_cost->fp[mode == DFmode].addsub;
5418 return true;
5421 case BSWAP:
5422 *cost = COSTS_N_INSNS (1);
5424 if (speed)
5425 *cost += extra_cost->alu.rev;
5427 return false;
5429 case IOR:
5430 if (aarch_rev16_p (x))
5432 *cost = COSTS_N_INSNS (1);
5434 if (speed)
5435 *cost += extra_cost->alu.rev;
5437 return true;
5439 /* Fall through. */
5440 case XOR:
5441 case AND:
5442 cost_logic:
5443 op0 = XEXP (x, 0);
5444 op1 = XEXP (x, 1);
5446 if (code == AND
5447 && GET_CODE (op0) == MULT
5448 && CONST_INT_P (XEXP (op0, 1))
5449 && CONST_INT_P (op1)
5450 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5451 INTVAL (op1)) != 0)
5453 /* This is a UBFM/SBFM. */
5454 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5455 if (speed)
5456 *cost += extra_cost->alu.bfx;
5457 return true;
5460 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5462 /* We possibly get the immediate for free, this is not
5463 modelled. */
5464 if (CONST_INT_P (op1)
5465 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5467 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5469 if (speed)
5470 *cost += extra_cost->alu.logical;
5472 return true;
5474 else
5476 rtx new_op0 = op0;
5478 /* Handle ORN, EON, or BIC. */
5479 if (GET_CODE (op0) == NOT)
5480 op0 = XEXP (op0, 0);
5482 new_op0 = aarch64_strip_shift (op0);
5484 /* If we had a shift on op0 then this is a logical-shift-
5485 by-register/immediate operation. Otherwise, this is just
5486 a logical operation. */
5487 if (speed)
5489 if (new_op0 != op0)
5491 /* Shift by immediate. */
5492 if (CONST_INT_P (XEXP (op0, 1)))
5493 *cost += extra_cost->alu.log_shift;
5494 else
5495 *cost += extra_cost->alu.log_shift_reg;
5497 else
5498 *cost += extra_cost->alu.logical;
5501 /* In both cases we want to cost both operands. */
5502 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5503 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5505 return true;
5508 return false;
5510 case NOT:
5511 /* MVN. */
5512 if (speed)
5513 *cost += extra_cost->alu.logical;
5515 /* The logical instruction could have the shifted register form,
5516 but the cost is the same if the shift is processed as a separate
5517 instruction, so we don't bother with it here. */
5518 return false;
5520 case ZERO_EXTEND:
5522 op0 = XEXP (x, 0);
5523 /* If a value is written in SI mode, then zero extended to DI
5524 mode, the operation will in general be free as a write to
5525 a 'w' register implicitly zeroes the upper bits of an 'x'
5526 register. However, if this is
5528 (set (reg) (zero_extend (reg)))
5530 we must cost the explicit register move. */
5531 if (mode == DImode
5532 && GET_MODE (op0) == SImode
5533 && outer == SET)
5535 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5537 if (!op_cost && speed)
5538 /* MOV. */
5539 *cost += extra_cost->alu.extend;
5540 else
5541 /* Free, the cost is that of the SI mode operation. */
5542 *cost = op_cost;
5544 return true;
5546 else if (MEM_P (XEXP (x, 0)))
5548 /* All loads can zero extend to any size for free. */
5549 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5550 return true;
5553 /* UXTB/UXTH. */
5554 if (speed)
5555 *cost += extra_cost->alu.extend;
5557 return false;
5559 case SIGN_EXTEND:
5560 if (MEM_P (XEXP (x, 0)))
5562 /* LDRSH. */
5563 if (speed)
5565 rtx address = XEXP (XEXP (x, 0), 0);
5566 *cost += extra_cost->ldst.load_sign_extend;
5568 *cost +=
5569 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5570 0, speed));
5572 return true;
5575 if (speed)
5576 *cost += extra_cost->alu.extend;
5577 return false;
5579 case ASHIFT:
5580 op0 = XEXP (x, 0);
5581 op1 = XEXP (x, 1);
5583 if (CONST_INT_P (op1))
5585 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5586 aliases. */
5587 if (speed)
5588 *cost += extra_cost->alu.shift;
5590 /* We can incorporate zero/sign extend for free. */
5591 if (GET_CODE (op0) == ZERO_EXTEND
5592 || GET_CODE (op0) == SIGN_EXTEND)
5593 op0 = XEXP (op0, 0);
5595 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5596 return true;
5598 else
5600 /* LSLV. */
5601 if (speed)
5602 *cost += extra_cost->alu.shift_reg;
5604 return false; /* All arguments need to be in registers. */
5607 case ROTATE:
5608 case ROTATERT:
5609 case LSHIFTRT:
5610 case ASHIFTRT:
5611 op0 = XEXP (x, 0);
5612 op1 = XEXP (x, 1);
5614 if (CONST_INT_P (op1))
5616 /* ASR (immediate) and friends. */
5617 if (speed)
5618 *cost += extra_cost->alu.shift;
5620 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5621 return true;
5623 else
5626 /* ASR (register) and friends. */
5627 if (speed)
5628 *cost += extra_cost->alu.shift_reg;
5630 return false; /* All arguments need to be in registers. */
5633 case SYMBOL_REF:
5635 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5637 /* LDR. */
5638 if (speed)
5639 *cost += extra_cost->ldst.load;
5641 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5642 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5644 /* ADRP, followed by ADD. */
5645 *cost += COSTS_N_INSNS (1);
5646 if (speed)
5647 *cost += 2 * extra_cost->alu.arith;
5649 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5650 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5652 /* ADR. */
5653 if (speed)
5654 *cost += extra_cost->alu.arith;
5657 if (flag_pic)
5659 /* One extra load instruction, after accessing the GOT. */
5660 *cost += COSTS_N_INSNS (1);
5661 if (speed)
5662 *cost += extra_cost->ldst.load;
5664 return true;
5666 case HIGH:
5667 case LO_SUM:
5668 /* ADRP/ADD (immediate). */
5669 if (speed)
5670 *cost += extra_cost->alu.arith;
5671 return true;
5673 case ZERO_EXTRACT:
5674 case SIGN_EXTRACT:
5675 /* UBFX/SBFX. */
5676 if (speed)
5677 *cost += extra_cost->alu.bfx;
5679 /* We can trust that the immediates used will be correct (there
5680 are no by-register forms), so we need only cost op0. */
5681 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5682 return true;
5684 case MULT:
5685 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5686 /* aarch64_rtx_mult_cost always handles recursion to its
5687 operands. */
5688 return true;
5690 case MOD:
5691 case UMOD:
5692 if (speed)
5694 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5695 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5696 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5697 else if (GET_MODE (x) == DFmode)
5698 *cost += (extra_cost->fp[1].mult
5699 + extra_cost->fp[1].div);
5700 else if (GET_MODE (x) == SFmode)
5701 *cost += (extra_cost->fp[0].mult
5702 + extra_cost->fp[0].div);
5704 return false; /* All arguments need to be in registers. */
5706 case DIV:
5707 case UDIV:
5708 case SQRT:
5709 if (speed)
5711 if (GET_MODE_CLASS (mode) == MODE_INT)
5712 /* There is no integer SQRT, so only DIV and UDIV can get
5713 here. */
5714 *cost += extra_cost->mult[mode == DImode].idiv;
5715 else
5716 *cost += extra_cost->fp[mode == DFmode].div;
5718 return false; /* All arguments need to be in registers. */
5720 case IF_THEN_ELSE:
5721 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5722 XEXP (x, 2), cost, speed);
5724 case EQ:
5725 case NE:
5726 case GT:
5727 case GTU:
5728 case LT:
5729 case LTU:
5730 case GE:
5731 case GEU:
5732 case LE:
5733 case LEU:
5735 return false; /* All arguments must be in registers. */
5737 case FMA:
5738 op0 = XEXP (x, 0);
5739 op1 = XEXP (x, 1);
5740 op2 = XEXP (x, 2);
5742 if (speed)
5743 *cost += extra_cost->fp[mode == DFmode].fma;
5745 /* FMSUB, FNMADD, and FNMSUB are free. */
5746 if (GET_CODE (op0) == NEG)
5747 op0 = XEXP (op0, 0);
5749 if (GET_CODE (op2) == NEG)
5750 op2 = XEXP (op2, 0);
5752 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5753 and the by-element operand as operand 0. */
5754 if (GET_CODE (op1) == NEG)
5755 op1 = XEXP (op1, 0);
5757 /* Catch vector-by-element operations. The by-element operand can
5758 either be (vec_duplicate (vec_select (x))) or just
5759 (vec_select (x)), depending on whether we are multiplying by
5760 a vector or a scalar.
5762 Canonicalization is not very good in these cases, FMA4 will put the
5763 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5764 if (GET_CODE (op0) == VEC_DUPLICATE)
5765 op0 = XEXP (op0, 0);
5766 else if (GET_CODE (op1) == VEC_DUPLICATE)
5767 op1 = XEXP (op1, 0);
5769 if (GET_CODE (op0) == VEC_SELECT)
5770 op0 = XEXP (op0, 0);
5771 else if (GET_CODE (op1) == VEC_SELECT)
5772 op1 = XEXP (op1, 0);
5774 /* If the remaining parameters are not registers,
5775 get the cost to put them into registers. */
5776 *cost += rtx_cost (op0, FMA, 0, speed);
5777 *cost += rtx_cost (op1, FMA, 1, speed);
5778 *cost += rtx_cost (op2, FMA, 2, speed);
5779 return true;
5781 case FLOAT_EXTEND:
5782 if (speed)
5783 *cost += extra_cost->fp[mode == DFmode].widen;
5784 return false;
5786 case FLOAT_TRUNCATE:
5787 if (speed)
5788 *cost += extra_cost->fp[mode == DFmode].narrow;
5789 return false;
5791 case FIX:
5792 case UNSIGNED_FIX:
5793 x = XEXP (x, 0);
5794 /* Strip the rounding part. They will all be implemented
5795 by the fcvt* family of instructions anyway. */
5796 if (GET_CODE (x) == UNSPEC)
5798 unsigned int uns_code = XINT (x, 1);
5800 if (uns_code == UNSPEC_FRINTA
5801 || uns_code == UNSPEC_FRINTM
5802 || uns_code == UNSPEC_FRINTN
5803 || uns_code == UNSPEC_FRINTP
5804 || uns_code == UNSPEC_FRINTZ)
5805 x = XVECEXP (x, 0, 0);
5808 if (speed)
5809 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5811 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5812 return true;
5814 case ABS:
5815 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5817 /* FABS and FNEG are analogous. */
5818 if (speed)
5819 *cost += extra_cost->fp[mode == DFmode].neg;
5821 else
5823 /* Integer ABS will either be split to
5824 two arithmetic instructions, or will be an ABS
5825 (scalar), which we don't model. */
5826 *cost = COSTS_N_INSNS (2);
5827 if (speed)
5828 *cost += 2 * extra_cost->alu.arith;
5830 return false;
5832 case SMAX:
5833 case SMIN:
5834 if (speed)
5836 /* FMAXNM/FMINNM/FMAX/FMIN.
5837 TODO: This may not be accurate for all implementations, but
5838 we do not model this in the cost tables. */
5839 *cost += extra_cost->fp[mode == DFmode].addsub;
5841 return false;
5843 case UNSPEC:
5844 /* The floating point round to integer frint* instructions. */
5845 if (aarch64_frint_unspec_p (XINT (x, 1)))
5847 if (speed)
5848 *cost += extra_cost->fp[mode == DFmode].roundint;
5850 return false;
5853 if (XINT (x, 1) == UNSPEC_RBIT)
5855 if (speed)
5856 *cost += extra_cost->alu.rev;
5858 return false;
5860 break;
5862 case TRUNCATE:
5864 /* Decompose <su>muldi3_highpart. */
5865 if (/* (truncate:DI */
5866 mode == DImode
5867 /* (lshiftrt:TI */
5868 && GET_MODE (XEXP (x, 0)) == TImode
5869 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5870 /* (mult:TI */
5871 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5872 /* (ANY_EXTEND:TI (reg:DI))
5873 (ANY_EXTEND:TI (reg:DI))) */
5874 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5875 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5876 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5877 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5878 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5879 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5880 /* (const_int 64) */
5881 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5882 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5884 /* UMULH/SMULH. */
5885 if (speed)
5886 *cost += extra_cost->mult[mode == DImode].extend;
5887 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5888 MULT, 0, speed);
5889 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5890 MULT, 1, speed);
5891 return true;
5894 /* Fall through. */
5895 default:
5896 break;
5899 if (dump_file && (dump_flags & TDF_DETAILS))
5900 fprintf (dump_file,
5901 "\nFailed to cost RTX. Assuming default cost.\n");
5903 return true;
5906 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5907 calculated for X. This cost is stored in *COST. Returns true
5908 if the total cost of X was calculated. */
5909 static bool
5910 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5911 int param, int *cost, bool speed)
5913 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5915 if (dump_file && (dump_flags & TDF_DETAILS))
5917 print_rtl_single (dump_file, x);
5918 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5919 speed ? "Hot" : "Cold",
5920 *cost, result ? "final" : "partial");
5923 return result;
5926 static int
5927 aarch64_register_move_cost (enum machine_mode mode,
5928 reg_class_t from_i, reg_class_t to_i)
5930 enum reg_class from = (enum reg_class) from_i;
5931 enum reg_class to = (enum reg_class) to_i;
5932 const struct cpu_regmove_cost *regmove_cost
5933 = aarch64_tune_params->regmove_cost;
5935 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
5936 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
5937 to = GENERAL_REGS;
5939 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
5940 from = GENERAL_REGS;
5942 /* Moving between GPR and stack cost is the same as GP2GP. */
5943 if ((from == GENERAL_REGS && to == STACK_REG)
5944 || (to == GENERAL_REGS && from == STACK_REG))
5945 return regmove_cost->GP2GP;
5947 /* To/From the stack register, we move via the gprs. */
5948 if (to == STACK_REG || from == STACK_REG)
5949 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5950 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5952 if (from == GENERAL_REGS && to == GENERAL_REGS)
5953 return regmove_cost->GP2GP;
5954 else if (from == GENERAL_REGS)
5955 return regmove_cost->GP2FP;
5956 else if (to == GENERAL_REGS)
5957 return regmove_cost->FP2GP;
5959 /* When AdvSIMD instructions are disabled it is not possible to move
5960 a 128-bit value directly between Q registers. This is handled in
5961 secondary reload. A general register is used as a scratch to move
5962 the upper DI value and the lower DI value is moved directly,
5963 hence the cost is the sum of three moves. */
5964 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5965 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5967 return regmove_cost->FP2FP;
5970 static int
5971 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5972 reg_class_t rclass ATTRIBUTE_UNUSED,
5973 bool in ATTRIBUTE_UNUSED)
5975 return aarch64_tune_params->memmov_cost;
5978 /* Return the number of instructions that can be issued per cycle. */
5979 static int
5980 aarch64_sched_issue_rate (void)
5982 return aarch64_tune_params->issue_rate;
5985 /* Vectorizer cost model target hooks. */
5987 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5988 static int
5989 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5990 tree vectype,
5991 int misalign ATTRIBUTE_UNUSED)
5993 unsigned elements;
5995 switch (type_of_cost)
5997 case scalar_stmt:
5998 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6000 case scalar_load:
6001 return aarch64_tune_params->vec_costs->scalar_load_cost;
6003 case scalar_store:
6004 return aarch64_tune_params->vec_costs->scalar_store_cost;
6006 case vector_stmt:
6007 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6009 case vector_load:
6010 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6012 case vector_store:
6013 return aarch64_tune_params->vec_costs->vec_store_cost;
6015 case vec_to_scalar:
6016 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6018 case scalar_to_vec:
6019 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6021 case unaligned_load:
6022 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6024 case unaligned_store:
6025 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6027 case cond_branch_taken:
6028 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6030 case cond_branch_not_taken:
6031 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6033 case vec_perm:
6034 case vec_promote_demote:
6035 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6037 case vec_construct:
6038 elements = TYPE_VECTOR_SUBPARTS (vectype);
6039 return elements / 2 + 1;
6041 default:
6042 gcc_unreachable ();
6046 /* Implement targetm.vectorize.add_stmt_cost. */
6047 static unsigned
6048 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6049 struct _stmt_vec_info *stmt_info, int misalign,
6050 enum vect_cost_model_location where)
6052 unsigned *cost = (unsigned *) data;
6053 unsigned retval = 0;
6055 if (flag_vect_cost_model)
6057 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6058 int stmt_cost =
6059 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6061 /* Statements in an inner loop relative to the loop being
6062 vectorized are weighted more heavily. The value here is
6063 a function (linear for now) of the loop nest level. */
6064 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6066 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6067 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6068 unsigned nest_level = loop_depth (loop);
6070 count *= nest_level;
6073 retval = (unsigned) (count * stmt_cost);
6074 cost[where] += retval;
6077 return retval;
6080 static void initialize_aarch64_code_model (void);
6082 /* Parse the architecture extension string. */
6084 static void
6085 aarch64_parse_extension (char *str)
6087 /* The extension string is parsed left to right. */
6088 const struct aarch64_option_extension *opt = NULL;
6090 /* Flag to say whether we are adding or removing an extension. */
6091 int adding_ext = -1;
6093 while (str != NULL && *str != 0)
6095 char *ext;
6096 size_t len;
6098 str++;
6099 ext = strchr (str, '+');
6101 if (ext != NULL)
6102 len = ext - str;
6103 else
6104 len = strlen (str);
6106 if (len >= 2 && strncmp (str, "no", 2) == 0)
6108 adding_ext = 0;
6109 len -= 2;
6110 str += 2;
6112 else if (len > 0)
6113 adding_ext = 1;
6115 if (len == 0)
6117 error ("missing feature modifier after %qs", "+no");
6118 return;
6121 /* Scan over the extensions table trying to find an exact match. */
6122 for (opt = all_extensions; opt->name != NULL; opt++)
6124 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6126 /* Add or remove the extension. */
6127 if (adding_ext)
6128 aarch64_isa_flags |= opt->flags_on;
6129 else
6130 aarch64_isa_flags &= ~(opt->flags_off);
6131 break;
6135 if (opt->name == NULL)
6137 /* Extension not found in list. */
6138 error ("unknown feature modifier %qs", str);
6139 return;
6142 str = ext;
6145 return;
6148 /* Parse the ARCH string. */
6150 static void
6151 aarch64_parse_arch (void)
6153 char *ext;
6154 const struct processor *arch;
6155 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6156 size_t len;
6158 strcpy (str, aarch64_arch_string);
6160 ext = strchr (str, '+');
6162 if (ext != NULL)
6163 len = ext - str;
6164 else
6165 len = strlen (str);
6167 if (len == 0)
6169 error ("missing arch name in -march=%qs", str);
6170 return;
6173 /* Loop through the list of supported ARCHs to find a match. */
6174 for (arch = all_architectures; arch->name != NULL; arch++)
6176 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6178 selected_arch = arch;
6179 aarch64_isa_flags = selected_arch->flags;
6181 if (!selected_cpu)
6182 selected_cpu = &all_cores[selected_arch->core];
6184 if (ext != NULL)
6186 /* ARCH string contains at least one extension. */
6187 aarch64_parse_extension (ext);
6190 if (strcmp (selected_arch->arch, selected_cpu->arch))
6192 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6193 selected_cpu->name, selected_arch->name);
6196 return;
6200 /* ARCH name not found in list. */
6201 error ("unknown value %qs for -march", str);
6202 return;
6205 /* Parse the CPU string. */
6207 static void
6208 aarch64_parse_cpu (void)
6210 char *ext;
6211 const struct processor *cpu;
6212 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6213 size_t len;
6215 strcpy (str, aarch64_cpu_string);
6217 ext = strchr (str, '+');
6219 if (ext != NULL)
6220 len = ext - str;
6221 else
6222 len = strlen (str);
6224 if (len == 0)
6226 error ("missing cpu name in -mcpu=%qs", str);
6227 return;
6230 /* Loop through the list of supported CPUs to find a match. */
6231 for (cpu = all_cores; cpu->name != NULL; cpu++)
6233 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6235 selected_cpu = cpu;
6236 selected_tune = cpu;
6237 aarch64_isa_flags = selected_cpu->flags;
6239 if (ext != NULL)
6241 /* CPU string contains at least one extension. */
6242 aarch64_parse_extension (ext);
6245 return;
6249 /* CPU name not found in list. */
6250 error ("unknown value %qs for -mcpu", str);
6251 return;
6254 /* Parse the TUNE string. */
6256 static void
6257 aarch64_parse_tune (void)
6259 const struct processor *cpu;
6260 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6261 strcpy (str, aarch64_tune_string);
6263 /* Loop through the list of supported CPUs to find a match. */
6264 for (cpu = all_cores; cpu->name != NULL; cpu++)
6266 if (strcmp (cpu->name, str) == 0)
6268 selected_tune = cpu;
6269 return;
6273 /* CPU name not found in list. */
6274 error ("unknown value %qs for -mtune", str);
6275 return;
6279 /* Implement TARGET_OPTION_OVERRIDE. */
6281 static void
6282 aarch64_override_options (void)
6284 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6285 If either of -march or -mtune is given, they override their
6286 respective component of -mcpu.
6288 So, first parse AARCH64_CPU_STRING, then the others, be careful
6289 with -march as, if -mcpu is not present on the command line, march
6290 must set a sensible default CPU. */
6291 if (aarch64_cpu_string)
6293 aarch64_parse_cpu ();
6296 if (aarch64_arch_string)
6298 aarch64_parse_arch ();
6301 if (aarch64_tune_string)
6303 aarch64_parse_tune ();
6306 #ifndef HAVE_AS_MABI_OPTION
6307 /* The compiler may have been configured with 2.23.* binutils, which does
6308 not have support for ILP32. */
6309 if (TARGET_ILP32)
6310 error ("Assembler does not support -mabi=ilp32");
6311 #endif
6313 initialize_aarch64_code_model ();
6315 aarch64_build_bitmask_table ();
6317 /* This target defaults to strict volatile bitfields. */
6318 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6319 flag_strict_volatile_bitfields = 1;
6321 /* If the user did not specify a processor, choose the default
6322 one for them. This will be the CPU set during configuration using
6323 --with-cpu, otherwise it is "generic". */
6324 if (!selected_cpu)
6326 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6327 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6330 gcc_assert (selected_cpu);
6332 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6333 if (!selected_tune)
6334 selected_tune = &all_cores[selected_cpu->core];
6336 aarch64_tune_flags = selected_tune->flags;
6337 aarch64_tune = selected_tune->core;
6338 aarch64_tune_params = selected_tune->tune;
6340 aarch64_override_options_after_change ();
6343 /* Implement targetm.override_options_after_change. */
6345 static void
6346 aarch64_override_options_after_change (void)
6348 if (flag_omit_frame_pointer)
6349 flag_omit_leaf_frame_pointer = false;
6350 else if (flag_omit_leaf_frame_pointer)
6351 flag_omit_frame_pointer = true;
6354 static struct machine_function *
6355 aarch64_init_machine_status (void)
6357 struct machine_function *machine;
6358 machine = ggc_cleared_alloc<machine_function> ();
6359 return machine;
6362 void
6363 aarch64_init_expanders (void)
6365 init_machine_status = aarch64_init_machine_status;
6368 /* A checking mechanism for the implementation of the various code models. */
6369 static void
6370 initialize_aarch64_code_model (void)
6372 if (flag_pic)
6374 switch (aarch64_cmodel_var)
6376 case AARCH64_CMODEL_TINY:
6377 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6378 break;
6379 case AARCH64_CMODEL_SMALL:
6380 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6381 break;
6382 case AARCH64_CMODEL_LARGE:
6383 sorry ("code model %qs with -f%s", "large",
6384 flag_pic > 1 ? "PIC" : "pic");
6385 default:
6386 gcc_unreachable ();
6389 else
6390 aarch64_cmodel = aarch64_cmodel_var;
6393 /* Return true if SYMBOL_REF X binds locally. */
6395 static bool
6396 aarch64_symbol_binds_local_p (const_rtx x)
6398 return (SYMBOL_REF_DECL (x)
6399 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6400 : SYMBOL_REF_LOCAL_P (x));
6403 /* Return true if SYMBOL_REF X is thread local */
6404 static bool
6405 aarch64_tls_symbol_p (rtx x)
6407 if (! TARGET_HAVE_TLS)
6408 return false;
6410 if (GET_CODE (x) != SYMBOL_REF)
6411 return false;
6413 return SYMBOL_REF_TLS_MODEL (x) != 0;
6416 /* Classify a TLS symbol into one of the TLS kinds. */
6417 enum aarch64_symbol_type
6418 aarch64_classify_tls_symbol (rtx x)
6420 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6422 switch (tls_kind)
6424 case TLS_MODEL_GLOBAL_DYNAMIC:
6425 case TLS_MODEL_LOCAL_DYNAMIC:
6426 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6428 case TLS_MODEL_INITIAL_EXEC:
6429 return SYMBOL_SMALL_GOTTPREL;
6431 case TLS_MODEL_LOCAL_EXEC:
6432 return SYMBOL_SMALL_TPREL;
6434 case TLS_MODEL_EMULATED:
6435 case TLS_MODEL_NONE:
6436 return SYMBOL_FORCE_TO_MEM;
6438 default:
6439 gcc_unreachable ();
6443 /* Return the method that should be used to access SYMBOL_REF or
6444 LABEL_REF X in context CONTEXT. */
6446 enum aarch64_symbol_type
6447 aarch64_classify_symbol (rtx x,
6448 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6450 if (GET_CODE (x) == LABEL_REF)
6452 switch (aarch64_cmodel)
6454 case AARCH64_CMODEL_LARGE:
6455 return SYMBOL_FORCE_TO_MEM;
6457 case AARCH64_CMODEL_TINY_PIC:
6458 case AARCH64_CMODEL_TINY:
6459 return SYMBOL_TINY_ABSOLUTE;
6461 case AARCH64_CMODEL_SMALL_PIC:
6462 case AARCH64_CMODEL_SMALL:
6463 return SYMBOL_SMALL_ABSOLUTE;
6465 default:
6466 gcc_unreachable ();
6470 if (GET_CODE (x) == SYMBOL_REF)
6472 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6473 return SYMBOL_FORCE_TO_MEM;
6475 if (aarch64_tls_symbol_p (x))
6476 return aarch64_classify_tls_symbol (x);
6478 switch (aarch64_cmodel)
6480 case AARCH64_CMODEL_TINY:
6481 if (SYMBOL_REF_WEAK (x))
6482 return SYMBOL_FORCE_TO_MEM;
6483 return SYMBOL_TINY_ABSOLUTE;
6485 case AARCH64_CMODEL_SMALL:
6486 if (SYMBOL_REF_WEAK (x))
6487 return SYMBOL_FORCE_TO_MEM;
6488 return SYMBOL_SMALL_ABSOLUTE;
6490 case AARCH64_CMODEL_TINY_PIC:
6491 if (!aarch64_symbol_binds_local_p (x))
6492 return SYMBOL_TINY_GOT;
6493 return SYMBOL_TINY_ABSOLUTE;
6495 case AARCH64_CMODEL_SMALL_PIC:
6496 if (!aarch64_symbol_binds_local_p (x))
6497 return SYMBOL_SMALL_GOT;
6498 return SYMBOL_SMALL_ABSOLUTE;
6500 default:
6501 gcc_unreachable ();
6505 /* By default push everything into the constant pool. */
6506 return SYMBOL_FORCE_TO_MEM;
6509 bool
6510 aarch64_constant_address_p (rtx x)
6512 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6515 bool
6516 aarch64_legitimate_pic_operand_p (rtx x)
6518 if (GET_CODE (x) == SYMBOL_REF
6519 || (GET_CODE (x) == CONST
6520 && GET_CODE (XEXP (x, 0)) == PLUS
6521 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6522 return false;
6524 return true;
6527 /* Return true if X holds either a quarter-precision or
6528 floating-point +0.0 constant. */
6529 static bool
6530 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6532 if (!CONST_DOUBLE_P (x))
6533 return false;
6535 /* TODO: We could handle moving 0.0 to a TFmode register,
6536 but first we would like to refactor the movtf_aarch64
6537 to be more amicable to split moves properly and
6538 correctly gate on TARGET_SIMD. For now - reject all
6539 constants which are not to SFmode or DFmode registers. */
6540 if (!(mode == SFmode || mode == DFmode))
6541 return false;
6543 if (aarch64_float_const_zero_rtx_p (x))
6544 return true;
6545 return aarch64_float_const_representable_p (x);
6548 static bool
6549 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6551 /* Do not allow vector struct mode constants. We could support
6552 0 and -1 easily, but they need support in aarch64-simd.md. */
6553 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6554 return false;
6556 /* This could probably go away because
6557 we now decompose CONST_INTs according to expand_mov_immediate. */
6558 if ((GET_CODE (x) == CONST_VECTOR
6559 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6560 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6561 return !targetm.cannot_force_const_mem (mode, x);
6563 if (GET_CODE (x) == HIGH
6564 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6565 return true;
6567 return aarch64_constant_address_p (x);
6571 aarch64_load_tp (rtx target)
6573 if (!target
6574 || GET_MODE (target) != Pmode
6575 || !register_operand (target, Pmode))
6576 target = gen_reg_rtx (Pmode);
6578 /* Can return in any reg. */
6579 emit_insn (gen_aarch64_load_tp_hard (target));
6580 return target;
6583 /* On AAPCS systems, this is the "struct __va_list". */
6584 static GTY(()) tree va_list_type;
6586 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6587 Return the type to use as __builtin_va_list.
6589 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6591 struct __va_list
6593 void *__stack;
6594 void *__gr_top;
6595 void *__vr_top;
6596 int __gr_offs;
6597 int __vr_offs;
6598 }; */
6600 static tree
6601 aarch64_build_builtin_va_list (void)
6603 tree va_list_name;
6604 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6606 /* Create the type. */
6607 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6608 /* Give it the required name. */
6609 va_list_name = build_decl (BUILTINS_LOCATION,
6610 TYPE_DECL,
6611 get_identifier ("__va_list"),
6612 va_list_type);
6613 DECL_ARTIFICIAL (va_list_name) = 1;
6614 TYPE_NAME (va_list_type) = va_list_name;
6615 TYPE_STUB_DECL (va_list_type) = va_list_name;
6617 /* Create the fields. */
6618 f_stack = build_decl (BUILTINS_LOCATION,
6619 FIELD_DECL, get_identifier ("__stack"),
6620 ptr_type_node);
6621 f_grtop = build_decl (BUILTINS_LOCATION,
6622 FIELD_DECL, get_identifier ("__gr_top"),
6623 ptr_type_node);
6624 f_vrtop = build_decl (BUILTINS_LOCATION,
6625 FIELD_DECL, get_identifier ("__vr_top"),
6626 ptr_type_node);
6627 f_groff = build_decl (BUILTINS_LOCATION,
6628 FIELD_DECL, get_identifier ("__gr_offs"),
6629 integer_type_node);
6630 f_vroff = build_decl (BUILTINS_LOCATION,
6631 FIELD_DECL, get_identifier ("__vr_offs"),
6632 integer_type_node);
6634 DECL_ARTIFICIAL (f_stack) = 1;
6635 DECL_ARTIFICIAL (f_grtop) = 1;
6636 DECL_ARTIFICIAL (f_vrtop) = 1;
6637 DECL_ARTIFICIAL (f_groff) = 1;
6638 DECL_ARTIFICIAL (f_vroff) = 1;
6640 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6641 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6642 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6643 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6644 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6646 TYPE_FIELDS (va_list_type) = f_stack;
6647 DECL_CHAIN (f_stack) = f_grtop;
6648 DECL_CHAIN (f_grtop) = f_vrtop;
6649 DECL_CHAIN (f_vrtop) = f_groff;
6650 DECL_CHAIN (f_groff) = f_vroff;
6652 /* Compute its layout. */
6653 layout_type (va_list_type);
6655 return va_list_type;
6658 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6659 static void
6660 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6662 const CUMULATIVE_ARGS *cum;
6663 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6664 tree stack, grtop, vrtop, groff, vroff;
6665 tree t;
6666 int gr_save_area_size;
6667 int vr_save_area_size;
6668 int vr_offset;
6670 cum = &crtl->args.info;
6671 gr_save_area_size
6672 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6673 vr_save_area_size
6674 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6676 if (TARGET_GENERAL_REGS_ONLY)
6678 if (cum->aapcs_nvrn > 0)
6679 sorry ("%qs and floating point or vector arguments",
6680 "-mgeneral-regs-only");
6681 vr_save_area_size = 0;
6684 f_stack = TYPE_FIELDS (va_list_type_node);
6685 f_grtop = DECL_CHAIN (f_stack);
6686 f_vrtop = DECL_CHAIN (f_grtop);
6687 f_groff = DECL_CHAIN (f_vrtop);
6688 f_vroff = DECL_CHAIN (f_groff);
6690 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6691 NULL_TREE);
6692 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6693 NULL_TREE);
6694 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6695 NULL_TREE);
6696 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6697 NULL_TREE);
6698 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6699 NULL_TREE);
6701 /* Emit code to initialize STACK, which points to the next varargs stack
6702 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6703 by named arguments. STACK is 8-byte aligned. */
6704 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6705 if (cum->aapcs_stack_size > 0)
6706 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6707 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6708 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6710 /* Emit code to initialize GRTOP, the top of the GR save area.
6711 virtual_incoming_args_rtx should have been 16 byte aligned. */
6712 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6713 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6714 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6716 /* Emit code to initialize VRTOP, the top of the VR save area.
6717 This address is gr_save_area_bytes below GRTOP, rounded
6718 down to the next 16-byte boundary. */
6719 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6720 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6721 STACK_BOUNDARY / BITS_PER_UNIT);
6723 if (vr_offset)
6724 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6725 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6726 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6728 /* Emit code to initialize GROFF, the offset from GRTOP of the
6729 next GPR argument. */
6730 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6731 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6732 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6734 /* Likewise emit code to initialize VROFF, the offset from FTOP
6735 of the next VR argument. */
6736 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6737 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6738 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6741 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6743 static tree
6744 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6745 gimple_seq *post_p ATTRIBUTE_UNUSED)
6747 tree addr;
6748 bool indirect_p;
6749 bool is_ha; /* is HFA or HVA. */
6750 bool dw_align; /* double-word align. */
6751 enum machine_mode ag_mode = VOIDmode;
6752 int nregs;
6753 enum machine_mode mode;
6755 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6756 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6757 HOST_WIDE_INT size, rsize, adjust, align;
6758 tree t, u, cond1, cond2;
6760 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6761 if (indirect_p)
6762 type = build_pointer_type (type);
6764 mode = TYPE_MODE (type);
6766 f_stack = TYPE_FIELDS (va_list_type_node);
6767 f_grtop = DECL_CHAIN (f_stack);
6768 f_vrtop = DECL_CHAIN (f_grtop);
6769 f_groff = DECL_CHAIN (f_vrtop);
6770 f_vroff = DECL_CHAIN (f_groff);
6772 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6773 f_stack, NULL_TREE);
6774 size = int_size_in_bytes (type);
6775 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6777 dw_align = false;
6778 adjust = 0;
6779 if (aarch64_vfp_is_call_or_return_candidate (mode,
6780 type,
6781 &ag_mode,
6782 &nregs,
6783 &is_ha))
6785 /* TYPE passed in fp/simd registers. */
6786 if (TARGET_GENERAL_REGS_ONLY)
6787 sorry ("%qs and floating point or vector arguments",
6788 "-mgeneral-regs-only");
6790 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6791 unshare_expr (valist), f_vrtop, NULL_TREE);
6792 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6793 unshare_expr (valist), f_vroff, NULL_TREE);
6795 rsize = nregs * UNITS_PER_VREG;
6797 if (is_ha)
6799 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6800 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6802 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6803 && size < UNITS_PER_VREG)
6805 adjust = UNITS_PER_VREG - size;
6808 else
6810 /* TYPE passed in general registers. */
6811 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6812 unshare_expr (valist), f_grtop, NULL_TREE);
6813 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6814 unshare_expr (valist), f_groff, NULL_TREE);
6815 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6816 nregs = rsize / UNITS_PER_WORD;
6818 if (align > 8)
6819 dw_align = true;
6821 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6822 && size < UNITS_PER_WORD)
6824 adjust = UNITS_PER_WORD - size;
6828 /* Get a local temporary for the field value. */
6829 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6831 /* Emit code to branch if off >= 0. */
6832 t = build2 (GE_EXPR, boolean_type_node, off,
6833 build_int_cst (TREE_TYPE (off), 0));
6834 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6836 if (dw_align)
6838 /* Emit: offs = (offs + 15) & -16. */
6839 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6840 build_int_cst (TREE_TYPE (off), 15));
6841 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6842 build_int_cst (TREE_TYPE (off), -16));
6843 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6845 else
6846 roundup = NULL;
6848 /* Update ap.__[g|v]r_offs */
6849 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6850 build_int_cst (TREE_TYPE (off), rsize));
6851 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6853 /* String up. */
6854 if (roundup)
6855 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6857 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6858 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6859 build_int_cst (TREE_TYPE (f_off), 0));
6860 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6862 /* String up: make sure the assignment happens before the use. */
6863 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6864 COND_EXPR_ELSE (cond1) = t;
6866 /* Prepare the trees handling the argument that is passed on the stack;
6867 the top level node will store in ON_STACK. */
6868 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6869 if (align > 8)
6871 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6872 t = fold_convert (intDI_type_node, arg);
6873 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6874 build_int_cst (TREE_TYPE (t), 15));
6875 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6876 build_int_cst (TREE_TYPE (t), -16));
6877 t = fold_convert (TREE_TYPE (arg), t);
6878 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6880 else
6881 roundup = NULL;
6882 /* Advance ap.__stack */
6883 t = fold_convert (intDI_type_node, arg);
6884 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6885 build_int_cst (TREE_TYPE (t), size + 7));
6886 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6887 build_int_cst (TREE_TYPE (t), -8));
6888 t = fold_convert (TREE_TYPE (arg), t);
6889 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6890 /* String up roundup and advance. */
6891 if (roundup)
6892 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6893 /* String up with arg */
6894 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6895 /* Big-endianness related address adjustment. */
6896 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6897 && size < UNITS_PER_WORD)
6899 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6900 size_int (UNITS_PER_WORD - size));
6901 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6904 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6905 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6907 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6908 t = off;
6909 if (adjust)
6910 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6911 build_int_cst (TREE_TYPE (off), adjust));
6913 t = fold_convert (sizetype, t);
6914 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6916 if (is_ha)
6918 /* type ha; // treat as "struct {ftype field[n];}"
6919 ... [computing offs]
6920 for (i = 0; i <nregs; ++i, offs += 16)
6921 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6922 return ha; */
6923 int i;
6924 tree tmp_ha, field_t, field_ptr_t;
6926 /* Declare a local variable. */
6927 tmp_ha = create_tmp_var_raw (type, "ha");
6928 gimple_add_tmp_var (tmp_ha);
6930 /* Establish the base type. */
6931 switch (ag_mode)
6933 case SFmode:
6934 field_t = float_type_node;
6935 field_ptr_t = float_ptr_type_node;
6936 break;
6937 case DFmode:
6938 field_t = double_type_node;
6939 field_ptr_t = double_ptr_type_node;
6940 break;
6941 case TFmode:
6942 field_t = long_double_type_node;
6943 field_ptr_t = long_double_ptr_type_node;
6944 break;
6945 /* The half precision and quad precision are not fully supported yet. Enable
6946 the following code after the support is complete. Need to find the correct
6947 type node for __fp16 *. */
6948 #if 0
6949 case HFmode:
6950 field_t = float_type_node;
6951 field_ptr_t = float_ptr_type_node;
6952 break;
6953 #endif
6954 case V2SImode:
6955 case V4SImode:
6957 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6958 field_t = build_vector_type_for_mode (innertype, ag_mode);
6959 field_ptr_t = build_pointer_type (field_t);
6961 break;
6962 default:
6963 gcc_assert (0);
6966 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6967 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6968 addr = t;
6969 t = fold_convert (field_ptr_t, addr);
6970 t = build2 (MODIFY_EXPR, field_t,
6971 build1 (INDIRECT_REF, field_t, tmp_ha),
6972 build1 (INDIRECT_REF, field_t, t));
6974 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6975 for (i = 1; i < nregs; ++i)
6977 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6978 u = fold_convert (field_ptr_t, addr);
6979 u = build2 (MODIFY_EXPR, field_t,
6980 build2 (MEM_REF, field_t, tmp_ha,
6981 build_int_cst (field_ptr_t,
6982 (i *
6983 int_size_in_bytes (field_t)))),
6984 build1 (INDIRECT_REF, field_t, u));
6985 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6988 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6989 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6992 COND_EXPR_ELSE (cond2) = t;
6993 addr = fold_convert (build_pointer_type (type), cond1);
6994 addr = build_va_arg_indirect_ref (addr);
6996 if (indirect_p)
6997 addr = build_va_arg_indirect_ref (addr);
6999 return addr;
7002 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7004 static void
7005 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7006 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7007 int no_rtl)
7009 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7010 CUMULATIVE_ARGS local_cum;
7011 int gr_saved, vr_saved;
7013 /* The caller has advanced CUM up to, but not beyond, the last named
7014 argument. Advance a local copy of CUM past the last "real" named
7015 argument, to find out how many registers are left over. */
7016 local_cum = *cum;
7017 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7019 /* Found out how many registers we need to save. */
7020 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7021 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7023 if (TARGET_GENERAL_REGS_ONLY)
7025 if (local_cum.aapcs_nvrn > 0)
7026 sorry ("%qs and floating point or vector arguments",
7027 "-mgeneral-regs-only");
7028 vr_saved = 0;
7031 if (!no_rtl)
7033 if (gr_saved > 0)
7035 rtx ptr, mem;
7037 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7038 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7039 - gr_saved * UNITS_PER_WORD);
7040 mem = gen_frame_mem (BLKmode, ptr);
7041 set_mem_alias_set (mem, get_varargs_alias_set ());
7043 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7044 mem, gr_saved);
7046 if (vr_saved > 0)
7048 /* We can't use move_block_from_reg, because it will use
7049 the wrong mode, storing D regs only. */
7050 enum machine_mode mode = TImode;
7051 int off, i;
7053 /* Set OFF to the offset from virtual_incoming_args_rtx of
7054 the first vector register. The VR save area lies below
7055 the GR one, and is aligned to 16 bytes. */
7056 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7057 STACK_BOUNDARY / BITS_PER_UNIT);
7058 off -= vr_saved * UNITS_PER_VREG;
7060 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7062 rtx ptr, mem;
7064 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7065 mem = gen_frame_mem (mode, ptr);
7066 set_mem_alias_set (mem, get_varargs_alias_set ());
7067 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7068 off += UNITS_PER_VREG;
7073 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7074 any complication of having crtl->args.pretend_args_size changed. */
7075 cfun->machine->frame.saved_varargs_size
7076 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7077 STACK_BOUNDARY / BITS_PER_UNIT)
7078 + vr_saved * UNITS_PER_VREG);
7081 static void
7082 aarch64_conditional_register_usage (void)
7084 int i;
7085 if (!TARGET_FLOAT)
7087 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7089 fixed_regs[i] = 1;
7090 call_used_regs[i] = 1;
7095 /* Walk down the type tree of TYPE counting consecutive base elements.
7096 If *MODEP is VOIDmode, then set it to the first valid floating point
7097 type. If a non-floating point type is found, or if a floating point
7098 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7099 otherwise return the count in the sub-tree. */
7100 static int
7101 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7103 enum machine_mode mode;
7104 HOST_WIDE_INT size;
7106 switch (TREE_CODE (type))
7108 case REAL_TYPE:
7109 mode = TYPE_MODE (type);
7110 if (mode != DFmode && mode != SFmode && mode != TFmode)
7111 return -1;
7113 if (*modep == VOIDmode)
7114 *modep = mode;
7116 if (*modep == mode)
7117 return 1;
7119 break;
7121 case COMPLEX_TYPE:
7122 mode = TYPE_MODE (TREE_TYPE (type));
7123 if (mode != DFmode && mode != SFmode && mode != TFmode)
7124 return -1;
7126 if (*modep == VOIDmode)
7127 *modep = mode;
7129 if (*modep == mode)
7130 return 2;
7132 break;
7134 case VECTOR_TYPE:
7135 /* Use V2SImode and V4SImode as representatives of all 64-bit
7136 and 128-bit vector types. */
7137 size = int_size_in_bytes (type);
7138 switch (size)
7140 case 8:
7141 mode = V2SImode;
7142 break;
7143 case 16:
7144 mode = V4SImode;
7145 break;
7146 default:
7147 return -1;
7150 if (*modep == VOIDmode)
7151 *modep = mode;
7153 /* Vector modes are considered to be opaque: two vectors are
7154 equivalent for the purposes of being homogeneous aggregates
7155 if they are the same size. */
7156 if (*modep == mode)
7157 return 1;
7159 break;
7161 case ARRAY_TYPE:
7163 int count;
7164 tree index = TYPE_DOMAIN (type);
7166 /* Can't handle incomplete types nor sizes that are not
7167 fixed. */
7168 if (!COMPLETE_TYPE_P (type)
7169 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7170 return -1;
7172 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7173 if (count == -1
7174 || !index
7175 || !TYPE_MAX_VALUE (index)
7176 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7177 || !TYPE_MIN_VALUE (index)
7178 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7179 || count < 0)
7180 return -1;
7182 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7183 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7185 /* There must be no padding. */
7186 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7187 return -1;
7189 return count;
7192 case RECORD_TYPE:
7194 int count = 0;
7195 int sub_count;
7196 tree field;
7198 /* Can't handle incomplete types nor sizes that are not
7199 fixed. */
7200 if (!COMPLETE_TYPE_P (type)
7201 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7202 return -1;
7204 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7206 if (TREE_CODE (field) != FIELD_DECL)
7207 continue;
7209 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7210 if (sub_count < 0)
7211 return -1;
7212 count += sub_count;
7215 /* There must be no padding. */
7216 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7217 return -1;
7219 return count;
7222 case UNION_TYPE:
7223 case QUAL_UNION_TYPE:
7225 /* These aren't very interesting except in a degenerate case. */
7226 int count = 0;
7227 int sub_count;
7228 tree field;
7230 /* Can't handle incomplete types nor sizes that are not
7231 fixed. */
7232 if (!COMPLETE_TYPE_P (type)
7233 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7234 return -1;
7236 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7238 if (TREE_CODE (field) != FIELD_DECL)
7239 continue;
7241 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7242 if (sub_count < 0)
7243 return -1;
7244 count = count > sub_count ? count : sub_count;
7247 /* There must be no padding. */
7248 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7249 return -1;
7251 return count;
7254 default:
7255 break;
7258 return -1;
7261 /* Return true if we use LRA instead of reload pass. */
7262 static bool
7263 aarch64_lra_p (void)
7265 return aarch64_lra_flag;
7268 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7269 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7270 array types. The C99 floating-point complex types are also considered
7271 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7272 types, which are GCC extensions and out of the scope of AAPCS64, are
7273 treated as composite types here as well.
7275 Note that MODE itself is not sufficient in determining whether a type
7276 is such a composite type or not. This is because
7277 stor-layout.c:compute_record_mode may have already changed the MODE
7278 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7279 structure with only one field may have its MODE set to the mode of the
7280 field. Also an integer mode whose size matches the size of the
7281 RECORD_TYPE type may be used to substitute the original mode
7282 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7283 solely relied on. */
7285 static bool
7286 aarch64_composite_type_p (const_tree type,
7287 enum machine_mode mode)
7289 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7290 return true;
7292 if (mode == BLKmode
7293 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7294 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7295 return true;
7297 return false;
7300 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7301 type as described in AAPCS64 \S 4.1.2.
7303 See the comment above aarch64_composite_type_p for the notes on MODE. */
7305 static bool
7306 aarch64_short_vector_p (const_tree type,
7307 enum machine_mode mode)
7309 HOST_WIDE_INT size = -1;
7311 if (type && TREE_CODE (type) == VECTOR_TYPE)
7312 size = int_size_in_bytes (type);
7313 else if (!aarch64_composite_type_p (type, mode)
7314 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7315 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7316 size = GET_MODE_SIZE (mode);
7318 return (size == 8 || size == 16) ? true : false;
7321 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7322 shall be passed or returned in simd/fp register(s) (providing these
7323 parameter passing registers are available).
7325 Upon successful return, *COUNT returns the number of needed registers,
7326 *BASE_MODE returns the mode of the individual register and when IS_HAF
7327 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7328 floating-point aggregate or a homogeneous short-vector aggregate. */
7330 static bool
7331 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7332 const_tree type,
7333 enum machine_mode *base_mode,
7334 int *count,
7335 bool *is_ha)
7337 enum machine_mode new_mode = VOIDmode;
7338 bool composite_p = aarch64_composite_type_p (type, mode);
7340 if (is_ha != NULL) *is_ha = false;
7342 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7343 || aarch64_short_vector_p (type, mode))
7345 *count = 1;
7346 new_mode = mode;
7348 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7350 if (is_ha != NULL) *is_ha = true;
7351 *count = 2;
7352 new_mode = GET_MODE_INNER (mode);
7354 else if (type && composite_p)
7356 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7358 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7360 if (is_ha != NULL) *is_ha = true;
7361 *count = ag_count;
7363 else
7364 return false;
7366 else
7367 return false;
7369 *base_mode = new_mode;
7370 return true;
7373 /* Implement TARGET_STRUCT_VALUE_RTX. */
7375 static rtx
7376 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7377 int incoming ATTRIBUTE_UNUSED)
7379 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7382 /* Implements target hook vector_mode_supported_p. */
7383 static bool
7384 aarch64_vector_mode_supported_p (enum machine_mode mode)
7386 if (TARGET_SIMD
7387 && (mode == V4SImode || mode == V8HImode
7388 || mode == V16QImode || mode == V2DImode
7389 || mode == V2SImode || mode == V4HImode
7390 || mode == V8QImode || mode == V2SFmode
7391 || mode == V4SFmode || mode == V2DFmode
7392 || mode == V1DFmode))
7393 return true;
7395 return false;
7398 /* Return appropriate SIMD container
7399 for MODE within a vector of WIDTH bits. */
7400 static enum machine_mode
7401 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7403 gcc_assert (width == 64 || width == 128);
7404 if (TARGET_SIMD)
7406 if (width == 128)
7407 switch (mode)
7409 case DFmode:
7410 return V2DFmode;
7411 case SFmode:
7412 return V4SFmode;
7413 case SImode:
7414 return V4SImode;
7415 case HImode:
7416 return V8HImode;
7417 case QImode:
7418 return V16QImode;
7419 case DImode:
7420 return V2DImode;
7421 default:
7422 break;
7424 else
7425 switch (mode)
7427 case SFmode:
7428 return V2SFmode;
7429 case SImode:
7430 return V2SImode;
7431 case HImode:
7432 return V4HImode;
7433 case QImode:
7434 return V8QImode;
7435 default:
7436 break;
7439 return word_mode;
7442 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7443 static enum machine_mode
7444 aarch64_preferred_simd_mode (enum machine_mode mode)
7446 return aarch64_simd_container_mode (mode, 128);
7449 /* Return the bitmask of possible vector sizes for the vectorizer
7450 to iterate over. */
7451 static unsigned int
7452 aarch64_autovectorize_vector_sizes (void)
7454 return (16 | 8);
7457 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7458 vector types in order to conform to the AAPCS64 (see "Procedure
7459 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7460 qualify for emission with the mangled names defined in that document,
7461 a vector type must not only be of the correct mode but also be
7462 composed of AdvSIMD vector element types (e.g.
7463 _builtin_aarch64_simd_qi); these types are registered by
7464 aarch64_init_simd_builtins (). In other words, vector types defined
7465 in other ways e.g. via vector_size attribute will get default
7466 mangled names. */
7467 typedef struct
7469 enum machine_mode mode;
7470 const char *element_type_name;
7471 const char *mangled_name;
7472 } aarch64_simd_mangle_map_entry;
7474 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7475 /* 64-bit containerized types. */
7476 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7477 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7478 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7479 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7480 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7481 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7482 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7483 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7484 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
7485 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
7486 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7487 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7488 /* 128-bit containerized types. */
7489 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7490 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7491 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7492 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7493 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7494 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7495 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7496 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7497 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7498 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7499 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7500 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7501 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7502 { VOIDmode, NULL, NULL }
7505 /* Implement TARGET_MANGLE_TYPE. */
7507 static const char *
7508 aarch64_mangle_type (const_tree type)
7510 /* The AArch64 ABI documents say that "__va_list" has to be
7511 managled as if it is in the "std" namespace. */
7512 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7513 return "St9__va_list";
7515 /* Check the mode of the vector type, and the name of the vector
7516 element type, against the table. */
7517 if (TREE_CODE (type) == VECTOR_TYPE)
7519 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7521 while (pos->mode != VOIDmode)
7523 tree elt_type = TREE_TYPE (type);
7525 if (pos->mode == TYPE_MODE (type)
7526 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7527 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7528 pos->element_type_name))
7529 return pos->mangled_name;
7531 pos++;
7535 /* Use the default mangling. */
7536 return NULL;
7539 /* Return the equivalent letter for size. */
7540 static char
7541 sizetochar (int size)
7543 switch (size)
7545 case 64: return 'd';
7546 case 32: return 's';
7547 case 16: return 'h';
7548 case 8 : return 'b';
7549 default: gcc_unreachable ();
7553 /* Return true iff x is a uniform vector of floating-point
7554 constants, and the constant can be represented in
7555 quarter-precision form. Note, as aarch64_float_const_representable
7556 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7557 static bool
7558 aarch64_vect_float_const_representable_p (rtx x)
7560 int i = 0;
7561 REAL_VALUE_TYPE r0, ri;
7562 rtx x0, xi;
7564 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7565 return false;
7567 x0 = CONST_VECTOR_ELT (x, 0);
7568 if (!CONST_DOUBLE_P (x0))
7569 return false;
7571 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7573 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7575 xi = CONST_VECTOR_ELT (x, i);
7576 if (!CONST_DOUBLE_P (xi))
7577 return false;
7579 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7580 if (!REAL_VALUES_EQUAL (r0, ri))
7581 return false;
7584 return aarch64_float_const_representable_p (x0);
7587 /* Return true for valid and false for invalid. */
7588 bool
7589 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7590 struct simd_immediate_info *info)
7592 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7593 matches = 1; \
7594 for (i = 0; i < idx; i += (STRIDE)) \
7595 if (!(TEST)) \
7596 matches = 0; \
7597 if (matches) \
7599 immtype = (CLASS); \
7600 elsize = (ELSIZE); \
7601 eshift = (SHIFT); \
7602 emvn = (NEG); \
7603 break; \
7606 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7607 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7608 unsigned char bytes[16];
7609 int immtype = -1, matches;
7610 unsigned int invmask = inverse ? 0xff : 0;
7611 int eshift, emvn;
7613 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7615 if (! (aarch64_simd_imm_zero_p (op, mode)
7616 || aarch64_vect_float_const_representable_p (op)))
7617 return false;
7619 if (info)
7621 info->value = CONST_VECTOR_ELT (op, 0);
7622 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7623 info->mvn = false;
7624 info->shift = 0;
7627 return true;
7630 /* Splat vector constant out into a byte vector. */
7631 for (i = 0; i < n_elts; i++)
7633 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7634 it must be laid out in the vector register in reverse order. */
7635 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7636 unsigned HOST_WIDE_INT elpart;
7637 unsigned int part, parts;
7639 if (CONST_INT_P (el))
7641 elpart = INTVAL (el);
7642 parts = 1;
7644 else if (GET_CODE (el) == CONST_DOUBLE)
7646 elpart = CONST_DOUBLE_LOW (el);
7647 parts = 2;
7649 else
7650 gcc_unreachable ();
7652 for (part = 0; part < parts; part++)
7654 unsigned int byte;
7655 for (byte = 0; byte < innersize; byte++)
7657 bytes[idx++] = (elpart & 0xff) ^ invmask;
7658 elpart >>= BITS_PER_UNIT;
7660 if (GET_CODE (el) == CONST_DOUBLE)
7661 elpart = CONST_DOUBLE_HIGH (el);
7665 /* Sanity check. */
7666 gcc_assert (idx == GET_MODE_SIZE (mode));
7670 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7671 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7673 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7674 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7676 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7677 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7679 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7680 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7682 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7684 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7686 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7687 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7689 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7690 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7692 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7693 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7695 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7696 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7698 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7700 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7702 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7703 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7705 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7706 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7708 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7709 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7711 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7712 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7714 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7716 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7717 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7719 while (0);
7721 if (immtype == -1)
7722 return false;
7724 if (info)
7726 info->element_width = elsize;
7727 info->mvn = emvn != 0;
7728 info->shift = eshift;
7730 unsigned HOST_WIDE_INT imm = 0;
7732 if (immtype >= 12 && immtype <= 15)
7733 info->msl = true;
7735 /* Un-invert bytes of recognized vector, if necessary. */
7736 if (invmask != 0)
7737 for (i = 0; i < idx; i++)
7738 bytes[i] ^= invmask;
7740 if (immtype == 17)
7742 /* FIXME: Broken on 32-bit H_W_I hosts. */
7743 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7745 for (i = 0; i < 8; i++)
7746 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7747 << (i * BITS_PER_UNIT);
7750 info->value = GEN_INT (imm);
7752 else
7754 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7755 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7757 /* Construct 'abcdefgh' because the assembler cannot handle
7758 generic constants. */
7759 if (info->mvn)
7760 imm = ~imm;
7761 imm = (imm >> info->shift) & 0xff;
7762 info->value = GEN_INT (imm);
7766 return true;
7767 #undef CHECK
7770 /* Check of immediate shift constants are within range. */
7771 bool
7772 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7774 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7775 if (left)
7776 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
7777 else
7778 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
7781 /* Return true if X is a uniform vector where all elements
7782 are either the floating-point constant 0.0 or the
7783 integer constant 0. */
7784 bool
7785 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7787 return x == CONST0_RTX (mode);
7790 bool
7791 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7793 HOST_WIDE_INT imm = INTVAL (x);
7794 int i;
7796 for (i = 0; i < 8; i++)
7798 unsigned int byte = imm & 0xff;
7799 if (byte != 0xff && byte != 0)
7800 return false;
7801 imm >>= 8;
7804 return true;
7807 bool
7808 aarch64_mov_operand_p (rtx x,
7809 enum aarch64_symbol_context context,
7810 enum machine_mode mode)
7812 if (GET_CODE (x) == HIGH
7813 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7814 return true;
7816 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7817 return true;
7819 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7820 return true;
7822 return aarch64_classify_symbolic_expression (x, context)
7823 == SYMBOL_TINY_ABSOLUTE;
7826 /* Return a const_int vector of VAL. */
7828 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7830 int nunits = GET_MODE_NUNITS (mode);
7831 rtvec v = rtvec_alloc (nunits);
7832 int i;
7834 for (i=0; i < nunits; i++)
7835 RTVEC_ELT (v, i) = GEN_INT (val);
7837 return gen_rtx_CONST_VECTOR (mode, v);
7840 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7842 bool
7843 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7845 enum machine_mode vmode;
7847 gcc_assert (!VECTOR_MODE_P (mode));
7848 vmode = aarch64_preferred_simd_mode (mode);
7849 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7850 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7853 /* Construct and return a PARALLEL RTX vector with elements numbering the
7854 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
7855 the vector - from the perspective of the architecture. This does not
7856 line up with GCC's perspective on lane numbers, so we end up with
7857 different masks depending on our target endian-ness. The diagram
7858 below may help. We must draw the distinction when building masks
7859 which select one half of the vector. An instruction selecting
7860 architectural low-lanes for a big-endian target, must be described using
7861 a mask selecting GCC high-lanes.
7863 Big-Endian Little-Endian
7865 GCC 0 1 2 3 3 2 1 0
7866 | x | x | x | x | | x | x | x | x |
7867 Architecture 3 2 1 0 3 2 1 0
7869 Low Mask: { 2, 3 } { 0, 1 }
7870 High Mask: { 0, 1 } { 2, 3 }
7874 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7876 int nunits = GET_MODE_NUNITS (mode);
7877 rtvec v = rtvec_alloc (nunits / 2);
7878 int high_base = nunits / 2;
7879 int low_base = 0;
7880 int base;
7881 rtx t1;
7882 int i;
7884 if (BYTES_BIG_ENDIAN)
7885 base = high ? low_base : high_base;
7886 else
7887 base = high ? high_base : low_base;
7889 for (i = 0; i < nunits / 2; i++)
7890 RTVEC_ELT (v, i) = GEN_INT (base + i);
7892 t1 = gen_rtx_PARALLEL (mode, v);
7893 return t1;
7896 /* Check OP for validity as a PARALLEL RTX vector with elements
7897 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
7898 from the perspective of the architecture. See the diagram above
7899 aarch64_simd_vect_par_cnst_half for more details. */
7901 bool
7902 aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
7903 bool high)
7905 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
7906 HOST_WIDE_INT count_op = XVECLEN (op, 0);
7907 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
7908 int i = 0;
7910 if (!VECTOR_MODE_P (mode))
7911 return false;
7913 if (count_op != count_ideal)
7914 return false;
7916 for (i = 0; i < count_ideal; i++)
7918 rtx elt_op = XVECEXP (op, 0, i);
7919 rtx elt_ideal = XVECEXP (ideal, 0, i);
7921 if (!CONST_INT_P (elt_op)
7922 || INTVAL (elt_ideal) != INTVAL (elt_op))
7923 return false;
7925 return true;
7928 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7929 HIGH (exclusive). */
7930 void
7931 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7933 HOST_WIDE_INT lane;
7934 gcc_assert (CONST_INT_P (operand));
7935 lane = INTVAL (operand);
7937 if (lane < low || lane >= high)
7938 error ("lane out of range");
7941 void
7942 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7944 gcc_assert (CONST_INT_P (operand));
7945 HOST_WIDE_INT lane = INTVAL (operand);
7947 if (lane < low || lane >= high)
7948 error ("constant out of range");
7951 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7952 registers). */
7953 void
7954 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7955 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7956 rtx op1)
7958 rtx mem = gen_rtx_MEM (mode, destaddr);
7959 rtx tmp1 = gen_reg_rtx (mode);
7960 rtx tmp2 = gen_reg_rtx (mode);
7962 emit_insn (intfn (tmp1, op1, tmp2));
7964 emit_move_insn (mem, tmp1);
7965 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7966 emit_move_insn (mem, tmp2);
7969 /* Return TRUE if OP is a valid vector addressing mode. */
7970 bool
7971 aarch64_simd_mem_operand_p (rtx op)
7973 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7974 || REG_P (XEXP (op, 0)));
7977 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7978 not to early-clobber SRC registers in the process.
7980 We assume that the operands described by SRC and DEST represent a
7981 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7982 number of components into which the copy has been decomposed. */
7983 void
7984 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7985 rtx *src, unsigned int count)
7987 unsigned int i;
7989 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7990 || REGNO (operands[0]) < REGNO (operands[1]))
7992 for (i = 0; i < count; i++)
7994 operands[2 * i] = dest[i];
7995 operands[2 * i + 1] = src[i];
7998 else
8000 for (i = 0; i < count; i++)
8002 operands[2 * i] = dest[count - i - 1];
8003 operands[2 * i + 1] = src[count - i - 1];
8008 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8009 one of VSTRUCT modes: OI, CI or XI. */
8011 aarch64_simd_attr_length_move (rtx_insn *insn)
8013 enum machine_mode mode;
8015 extract_insn_cached (insn);
8017 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8019 mode = GET_MODE (recog_data.operand[0]);
8020 switch (mode)
8022 case OImode:
8023 return 8;
8024 case CImode:
8025 return 12;
8026 case XImode:
8027 return 16;
8028 default:
8029 gcc_unreachable ();
8032 return 4;
8035 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8036 alignment of a vector to 128 bits. */
8037 static HOST_WIDE_INT
8038 aarch64_simd_vector_alignment (const_tree type)
8040 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8041 return MIN (align, 128);
8044 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8045 static bool
8046 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8048 if (is_packed)
8049 return false;
8051 /* We guarantee alignment for vectors up to 128-bits. */
8052 if (tree_int_cst_compare (TYPE_SIZE (type),
8053 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8054 return false;
8056 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8057 return true;
8060 /* If VALS is a vector constant that can be loaded into a register
8061 using DUP, generate instructions to do so and return an RTX to
8062 assign to the register. Otherwise return NULL_RTX. */
8063 static rtx
8064 aarch64_simd_dup_constant (rtx vals)
8066 enum machine_mode mode = GET_MODE (vals);
8067 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8068 int n_elts = GET_MODE_NUNITS (mode);
8069 bool all_same = true;
8070 rtx x;
8071 int i;
8073 if (GET_CODE (vals) != CONST_VECTOR)
8074 return NULL_RTX;
8076 for (i = 1; i < n_elts; ++i)
8078 x = CONST_VECTOR_ELT (vals, i);
8079 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8080 all_same = false;
8083 if (!all_same)
8084 return NULL_RTX;
8086 /* We can load this constant by using DUP and a constant in a
8087 single ARM register. This will be cheaper than a vector
8088 load. */
8089 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8090 return gen_rtx_VEC_DUPLICATE (mode, x);
8094 /* Generate code to load VALS, which is a PARALLEL containing only
8095 constants (for vec_init) or CONST_VECTOR, efficiently into a
8096 register. Returns an RTX to copy into the register, or NULL_RTX
8097 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8098 static rtx
8099 aarch64_simd_make_constant (rtx vals)
8101 enum machine_mode mode = GET_MODE (vals);
8102 rtx const_dup;
8103 rtx const_vec = NULL_RTX;
8104 int n_elts = GET_MODE_NUNITS (mode);
8105 int n_const = 0;
8106 int i;
8108 if (GET_CODE (vals) == CONST_VECTOR)
8109 const_vec = vals;
8110 else if (GET_CODE (vals) == PARALLEL)
8112 /* A CONST_VECTOR must contain only CONST_INTs and
8113 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8114 Only store valid constants in a CONST_VECTOR. */
8115 for (i = 0; i < n_elts; ++i)
8117 rtx x = XVECEXP (vals, 0, i);
8118 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8119 n_const++;
8121 if (n_const == n_elts)
8122 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8124 else
8125 gcc_unreachable ();
8127 if (const_vec != NULL_RTX
8128 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8129 /* Load using MOVI/MVNI. */
8130 return const_vec;
8131 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8132 /* Loaded using DUP. */
8133 return const_dup;
8134 else if (const_vec != NULL_RTX)
8135 /* Load from constant pool. We can not take advantage of single-cycle
8136 LD1 because we need a PC-relative addressing mode. */
8137 return const_vec;
8138 else
8139 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8140 We can not construct an initializer. */
8141 return NULL_RTX;
8144 void
8145 aarch64_expand_vector_init (rtx target, rtx vals)
8147 enum machine_mode mode = GET_MODE (target);
8148 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8149 int n_elts = GET_MODE_NUNITS (mode);
8150 int n_var = 0, one_var = -1;
8151 bool all_same = true;
8152 rtx x, mem;
8153 int i;
8155 x = XVECEXP (vals, 0, 0);
8156 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8157 n_var = 1, one_var = 0;
8159 for (i = 1; i < n_elts; ++i)
8161 x = XVECEXP (vals, 0, i);
8162 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8163 ++n_var, one_var = i;
8165 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8166 all_same = false;
8169 if (n_var == 0)
8171 rtx constant = aarch64_simd_make_constant (vals);
8172 if (constant != NULL_RTX)
8174 emit_move_insn (target, constant);
8175 return;
8179 /* Splat a single non-constant element if we can. */
8180 if (all_same)
8182 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8183 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8184 return;
8187 /* One field is non-constant. Load constant then overwrite varying
8188 field. This is more efficient than using the stack. */
8189 if (n_var == 1)
8191 rtx copy = copy_rtx (vals);
8192 rtx index = GEN_INT (one_var);
8193 enum insn_code icode;
8195 /* Load constant part of vector, substitute neighboring value for
8196 varying element. */
8197 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8198 aarch64_expand_vector_init (target, copy);
8200 /* Insert variable. */
8201 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8202 icode = optab_handler (vec_set_optab, mode);
8203 gcc_assert (icode != CODE_FOR_nothing);
8204 emit_insn (GEN_FCN (icode) (target, x, index));
8205 return;
8208 /* Construct the vector in memory one field at a time
8209 and load the whole vector. */
8210 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8211 for (i = 0; i < n_elts; i++)
8212 emit_move_insn (adjust_address_nv (mem, inner_mode,
8213 i * GET_MODE_SIZE (inner_mode)),
8214 XVECEXP (vals, 0, i));
8215 emit_move_insn (target, mem);
8219 static unsigned HOST_WIDE_INT
8220 aarch64_shift_truncation_mask (enum machine_mode mode)
8222 return
8223 (aarch64_vector_mode_supported_p (mode)
8224 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8227 #ifndef TLS_SECTION_ASM_FLAG
8228 #define TLS_SECTION_ASM_FLAG 'T'
8229 #endif
8231 void
8232 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8233 tree decl ATTRIBUTE_UNUSED)
8235 char flagchars[10], *f = flagchars;
8237 /* If we have already declared this section, we can use an
8238 abbreviated form to switch back to it -- unless this section is
8239 part of a COMDAT groups, in which case GAS requires the full
8240 declaration every time. */
8241 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8242 && (flags & SECTION_DECLARED))
8244 fprintf (asm_out_file, "\t.section\t%s\n", name);
8245 return;
8248 if (!(flags & SECTION_DEBUG))
8249 *f++ = 'a';
8250 if (flags & SECTION_WRITE)
8251 *f++ = 'w';
8252 if (flags & SECTION_CODE)
8253 *f++ = 'x';
8254 if (flags & SECTION_SMALL)
8255 *f++ = 's';
8256 if (flags & SECTION_MERGE)
8257 *f++ = 'M';
8258 if (flags & SECTION_STRINGS)
8259 *f++ = 'S';
8260 if (flags & SECTION_TLS)
8261 *f++ = TLS_SECTION_ASM_FLAG;
8262 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8263 *f++ = 'G';
8264 *f = '\0';
8266 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8268 if (!(flags & SECTION_NOTYPE))
8270 const char *type;
8271 const char *format;
8273 if (flags & SECTION_BSS)
8274 type = "nobits";
8275 else
8276 type = "progbits";
8278 #ifdef TYPE_OPERAND_FMT
8279 format = "," TYPE_OPERAND_FMT;
8280 #else
8281 format = ",@%s";
8282 #endif
8284 fprintf (asm_out_file, format, type);
8286 if (flags & SECTION_ENTSIZE)
8287 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8288 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8290 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8291 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8292 else
8293 fprintf (asm_out_file, ",%s,comdat",
8294 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8298 putc ('\n', asm_out_file);
8301 /* Select a format to encode pointers in exception handling data. */
8303 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8305 int type;
8306 switch (aarch64_cmodel)
8308 case AARCH64_CMODEL_TINY:
8309 case AARCH64_CMODEL_TINY_PIC:
8310 case AARCH64_CMODEL_SMALL:
8311 case AARCH64_CMODEL_SMALL_PIC:
8312 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8313 for everything. */
8314 type = DW_EH_PE_sdata4;
8315 break;
8316 default:
8317 /* No assumptions here. 8-byte relocs required. */
8318 type = DW_EH_PE_sdata8;
8319 break;
8321 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8324 /* Emit load exclusive. */
8326 static void
8327 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8328 rtx mem, rtx model_rtx)
8330 rtx (*gen) (rtx, rtx, rtx);
8332 switch (mode)
8334 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8335 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8336 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8337 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8338 default:
8339 gcc_unreachable ();
8342 emit_insn (gen (rval, mem, model_rtx));
8345 /* Emit store exclusive. */
8347 static void
8348 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8349 rtx rval, rtx mem, rtx model_rtx)
8351 rtx (*gen) (rtx, rtx, rtx, rtx);
8353 switch (mode)
8355 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8356 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8357 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8358 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8359 default:
8360 gcc_unreachable ();
8363 emit_insn (gen (bval, rval, mem, model_rtx));
8366 /* Mark the previous jump instruction as unlikely. */
8368 static void
8369 aarch64_emit_unlikely_jump (rtx insn)
8371 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8373 insn = emit_jump_insn (insn);
8374 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8377 /* Expand a compare and swap pattern. */
8379 void
8380 aarch64_expand_compare_and_swap (rtx operands[])
8382 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8383 enum machine_mode mode, cmp_mode;
8384 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8386 bval = operands[0];
8387 rval = operands[1];
8388 mem = operands[2];
8389 oldval = operands[3];
8390 newval = operands[4];
8391 is_weak = operands[5];
8392 mod_s = operands[6];
8393 mod_f = operands[7];
8394 mode = GET_MODE (mem);
8395 cmp_mode = mode;
8397 /* Normally the succ memory model must be stronger than fail, but in the
8398 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8399 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8401 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8402 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8403 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8405 switch (mode)
8407 case QImode:
8408 case HImode:
8409 /* For short modes, we're going to perform the comparison in SImode,
8410 so do the zero-extension now. */
8411 cmp_mode = SImode;
8412 rval = gen_reg_rtx (SImode);
8413 oldval = convert_modes (SImode, mode, oldval, true);
8414 /* Fall through. */
8416 case SImode:
8417 case DImode:
8418 /* Force the value into a register if needed. */
8419 if (!aarch64_plus_operand (oldval, mode))
8420 oldval = force_reg (cmp_mode, oldval);
8421 break;
8423 default:
8424 gcc_unreachable ();
8427 switch (mode)
8429 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8430 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8431 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8432 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8433 default:
8434 gcc_unreachable ();
8437 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8439 if (mode == QImode || mode == HImode)
8440 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8442 x = gen_rtx_REG (CCmode, CC_REGNUM);
8443 x = gen_rtx_EQ (SImode, x, const0_rtx);
8444 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8447 /* Split a compare and swap pattern. */
8449 void
8450 aarch64_split_compare_and_swap (rtx operands[])
8452 rtx rval, mem, oldval, newval, scratch;
8453 enum machine_mode mode;
8454 bool is_weak;
8455 rtx_code_label *label1, *label2;
8456 rtx x, cond;
8458 rval = operands[0];
8459 mem = operands[1];
8460 oldval = operands[2];
8461 newval = operands[3];
8462 is_weak = (operands[4] != const0_rtx);
8463 scratch = operands[7];
8464 mode = GET_MODE (mem);
8466 label1 = NULL;
8467 if (!is_weak)
8469 label1 = gen_label_rtx ();
8470 emit_label (label1);
8472 label2 = gen_label_rtx ();
8474 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8476 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8477 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8478 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8479 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8480 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8482 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8484 if (!is_weak)
8486 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8487 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8488 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8489 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8491 else
8493 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8494 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8495 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8498 emit_label (label2);
8501 /* Split an atomic operation. */
8503 void
8504 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8505 rtx value, rtx model_rtx, rtx cond)
8507 enum machine_mode mode = GET_MODE (mem);
8508 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8509 rtx_code_label *label;
8510 rtx x;
8512 label = gen_label_rtx ();
8513 emit_label (label);
8515 if (new_out)
8516 new_out = gen_lowpart (wmode, new_out);
8517 if (old_out)
8518 old_out = gen_lowpart (wmode, old_out);
8519 else
8520 old_out = new_out;
8521 value = simplify_gen_subreg (wmode, value, mode, 0);
8523 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8525 switch (code)
8527 case SET:
8528 new_out = value;
8529 break;
8531 case NOT:
8532 x = gen_rtx_AND (wmode, old_out, value);
8533 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8534 x = gen_rtx_NOT (wmode, new_out);
8535 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8536 break;
8538 case MINUS:
8539 if (CONST_INT_P (value))
8541 value = GEN_INT (-INTVAL (value));
8542 code = PLUS;
8544 /* Fall through. */
8546 default:
8547 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8548 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8549 break;
8552 aarch64_emit_store_exclusive (mode, cond, mem,
8553 gen_lowpart (mode, new_out), model_rtx);
8555 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8556 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8557 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8558 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8561 static void
8562 aarch64_print_extension (void)
8564 const struct aarch64_option_extension *opt = NULL;
8566 for (opt = all_extensions; opt->name != NULL; opt++)
8567 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8568 asm_fprintf (asm_out_file, "+%s", opt->name);
8570 asm_fprintf (asm_out_file, "\n");
8573 static void
8574 aarch64_start_file (void)
8576 if (selected_arch)
8578 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8579 aarch64_print_extension ();
8581 else if (selected_cpu)
8583 const char *truncated_name
8584 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8585 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8586 aarch64_print_extension ();
8588 default_file_start();
8591 /* Target hook for c_mode_for_suffix. */
8592 static enum machine_mode
8593 aarch64_c_mode_for_suffix (char suffix)
8595 if (suffix == 'q')
8596 return TFmode;
8598 return VOIDmode;
8601 /* We can only represent floating point constants which will fit in
8602 "quarter-precision" values. These values are characterised by
8603 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8606 (-1)^s * (n/16) * 2^r
8608 Where:
8609 's' is the sign bit.
8610 'n' is an integer in the range 16 <= n <= 31.
8611 'r' is an integer in the range -3 <= r <= 4. */
8613 /* Return true iff X can be represented by a quarter-precision
8614 floating point immediate operand X. Note, we cannot represent 0.0. */
8615 bool
8616 aarch64_float_const_representable_p (rtx x)
8618 /* This represents our current view of how many bits
8619 make up the mantissa. */
8620 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8621 int exponent;
8622 unsigned HOST_WIDE_INT mantissa, mask;
8623 REAL_VALUE_TYPE r, m;
8624 bool fail;
8626 if (!CONST_DOUBLE_P (x))
8627 return false;
8629 if (GET_MODE (x) == VOIDmode)
8630 return false;
8632 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8634 /* We cannot represent infinities, NaNs or +/-zero. We won't
8635 know if we have +zero until we analyse the mantissa, but we
8636 can reject the other invalid values. */
8637 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8638 || REAL_VALUE_MINUS_ZERO (r))
8639 return false;
8641 /* Extract exponent. */
8642 r = real_value_abs (&r);
8643 exponent = REAL_EXP (&r);
8645 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8646 highest (sign) bit, with a fixed binary point at bit point_pos.
8647 m1 holds the low part of the mantissa, m2 the high part.
8648 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8649 bits for the mantissa, this can fail (low bits will be lost). */
8650 real_ldexp (&m, &r, point_pos - exponent);
8651 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8653 /* If the low part of the mantissa has bits set we cannot represent
8654 the value. */
8655 if (w.elt (0) != 0)
8656 return false;
8657 /* We have rejected the lower HOST_WIDE_INT, so update our
8658 understanding of how many bits lie in the mantissa and
8659 look only at the high HOST_WIDE_INT. */
8660 mantissa = w.elt (1);
8661 point_pos -= HOST_BITS_PER_WIDE_INT;
8663 /* We can only represent values with a mantissa of the form 1.xxxx. */
8664 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8665 if ((mantissa & mask) != 0)
8666 return false;
8668 /* Having filtered unrepresentable values, we may now remove all
8669 but the highest 5 bits. */
8670 mantissa >>= point_pos - 5;
8672 /* We cannot represent the value 0.0, so reject it. This is handled
8673 elsewhere. */
8674 if (mantissa == 0)
8675 return false;
8677 /* Then, as bit 4 is always set, we can mask it off, leaving
8678 the mantissa in the range [0, 15]. */
8679 mantissa &= ~(1 << 4);
8680 gcc_assert (mantissa <= 15);
8682 /* GCC internally does not use IEEE754-like encoding (where normalized
8683 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8684 Our mantissa values are shifted 4 places to the left relative to
8685 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8686 by 5 places to correct for GCC's representation. */
8687 exponent = 5 - exponent;
8689 return (exponent >= 0 && exponent <= 7);
8692 char*
8693 aarch64_output_simd_mov_immediate (rtx const_vector,
8694 enum machine_mode mode,
8695 unsigned width)
8697 bool is_valid;
8698 static char templ[40];
8699 const char *mnemonic;
8700 const char *shift_op;
8701 unsigned int lane_count = 0;
8702 char element_char;
8704 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8706 /* This will return true to show const_vector is legal for use as either
8707 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8708 also update INFO to show how the immediate should be generated. */
8709 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8710 gcc_assert (is_valid);
8712 element_char = sizetochar (info.element_width);
8713 lane_count = width / info.element_width;
8715 mode = GET_MODE_INNER (mode);
8716 if (mode == SFmode || mode == DFmode)
8718 gcc_assert (info.shift == 0 && ! info.mvn);
8719 if (aarch64_float_const_zero_rtx_p (info.value))
8720 info.value = GEN_INT (0);
8721 else
8723 #define buf_size 20
8724 REAL_VALUE_TYPE r;
8725 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8726 char float_buf[buf_size] = {'\0'};
8727 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8728 #undef buf_size
8730 if (lane_count == 1)
8731 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8732 else
8733 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8734 lane_count, element_char, float_buf);
8735 return templ;
8739 mnemonic = info.mvn ? "mvni" : "movi";
8740 shift_op = info.msl ? "msl" : "lsl";
8742 if (lane_count == 1)
8743 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8744 mnemonic, UINTVAL (info.value));
8745 else if (info.shift)
8746 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8747 ", %s %d", mnemonic, lane_count, element_char,
8748 UINTVAL (info.value), shift_op, info.shift);
8749 else
8750 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8751 mnemonic, lane_count, element_char, UINTVAL (info.value));
8752 return templ;
8755 char*
8756 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8757 enum machine_mode mode)
8759 enum machine_mode vmode;
8761 gcc_assert (!VECTOR_MODE_P (mode));
8762 vmode = aarch64_simd_container_mode (mode, 64);
8763 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8764 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8767 /* Split operands into moves from op[1] + op[2] into op[0]. */
8769 void
8770 aarch64_split_combinev16qi (rtx operands[3])
8772 unsigned int dest = REGNO (operands[0]);
8773 unsigned int src1 = REGNO (operands[1]);
8774 unsigned int src2 = REGNO (operands[2]);
8775 enum machine_mode halfmode = GET_MODE (operands[1]);
8776 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8777 rtx destlo, desthi;
8779 gcc_assert (halfmode == V16QImode);
8781 if (src1 == dest && src2 == dest + halfregs)
8783 /* No-op move. Can't split to nothing; emit something. */
8784 emit_note (NOTE_INSN_DELETED);
8785 return;
8788 /* Preserve register attributes for variable tracking. */
8789 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8790 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8791 GET_MODE_SIZE (halfmode));
8793 /* Special case of reversed high/low parts. */
8794 if (reg_overlap_mentioned_p (operands[2], destlo)
8795 && reg_overlap_mentioned_p (operands[1], desthi))
8797 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8798 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8799 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8801 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8803 /* Try to avoid unnecessary moves if part of the result
8804 is in the right place already. */
8805 if (src1 != dest)
8806 emit_move_insn (destlo, operands[1]);
8807 if (src2 != dest + halfregs)
8808 emit_move_insn (desthi, operands[2]);
8810 else
8812 if (src2 != dest + halfregs)
8813 emit_move_insn (desthi, operands[2]);
8814 if (src1 != dest)
8815 emit_move_insn (destlo, operands[1]);
8819 /* vec_perm support. */
8821 #define MAX_VECT_LEN 16
8823 struct expand_vec_perm_d
8825 rtx target, op0, op1;
8826 unsigned char perm[MAX_VECT_LEN];
8827 enum machine_mode vmode;
8828 unsigned char nelt;
8829 bool one_vector_p;
8830 bool testing_p;
8833 /* Generate a variable permutation. */
8835 static void
8836 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8838 enum machine_mode vmode = GET_MODE (target);
8839 bool one_vector_p = rtx_equal_p (op0, op1);
8841 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8842 gcc_checking_assert (GET_MODE (op0) == vmode);
8843 gcc_checking_assert (GET_MODE (op1) == vmode);
8844 gcc_checking_assert (GET_MODE (sel) == vmode);
8845 gcc_checking_assert (TARGET_SIMD);
8847 if (one_vector_p)
8849 if (vmode == V8QImode)
8851 /* Expand the argument to a V16QI mode by duplicating it. */
8852 rtx pair = gen_reg_rtx (V16QImode);
8853 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8854 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8856 else
8858 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8861 else
8863 rtx pair;
8865 if (vmode == V8QImode)
8867 pair = gen_reg_rtx (V16QImode);
8868 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8869 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8871 else
8873 pair = gen_reg_rtx (OImode);
8874 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8875 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8880 void
8881 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8883 enum machine_mode vmode = GET_MODE (target);
8884 unsigned int nelt = GET_MODE_NUNITS (vmode);
8885 bool one_vector_p = rtx_equal_p (op0, op1);
8886 rtx mask;
8888 /* The TBL instruction does not use a modulo index, so we must take care
8889 of that ourselves. */
8890 mask = aarch64_simd_gen_const_vector_dup (vmode,
8891 one_vector_p ? nelt - 1 : 2 * nelt - 1);
8892 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8894 /* For big-endian, we also need to reverse the index within the vector
8895 (but not which vector). */
8896 if (BYTES_BIG_ENDIAN)
8898 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8899 if (!one_vector_p)
8900 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8901 sel = expand_simple_binop (vmode, XOR, sel, mask,
8902 NULL, 0, OPTAB_LIB_WIDEN);
8904 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8907 /* Recognize patterns suitable for the TRN instructions. */
8908 static bool
8909 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8911 unsigned int i, odd, mask, nelt = d->nelt;
8912 rtx out, in0, in1, x;
8913 rtx (*gen) (rtx, rtx, rtx);
8914 enum machine_mode vmode = d->vmode;
8916 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8917 return false;
8919 /* Note that these are little-endian tests.
8920 We correct for big-endian later. */
8921 if (d->perm[0] == 0)
8922 odd = 0;
8923 else if (d->perm[0] == 1)
8924 odd = 1;
8925 else
8926 return false;
8927 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8929 for (i = 0; i < nelt; i += 2)
8931 if (d->perm[i] != i + odd)
8932 return false;
8933 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8934 return false;
8937 /* Success! */
8938 if (d->testing_p)
8939 return true;
8941 in0 = d->op0;
8942 in1 = d->op1;
8943 if (BYTES_BIG_ENDIAN)
8945 x = in0, in0 = in1, in1 = x;
8946 odd = !odd;
8948 out = d->target;
8950 if (odd)
8952 switch (vmode)
8954 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8955 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8956 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8957 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8958 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8959 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8960 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8961 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8962 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8963 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8964 default:
8965 return false;
8968 else
8970 switch (vmode)
8972 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8973 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8974 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8975 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8976 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8977 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8978 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8979 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8980 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8981 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8982 default:
8983 return false;
8987 emit_insn (gen (out, in0, in1));
8988 return true;
8991 /* Recognize patterns suitable for the UZP instructions. */
8992 static bool
8993 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8995 unsigned int i, odd, mask, nelt = d->nelt;
8996 rtx out, in0, in1, x;
8997 rtx (*gen) (rtx, rtx, rtx);
8998 enum machine_mode vmode = d->vmode;
9000 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9001 return false;
9003 /* Note that these are little-endian tests.
9004 We correct for big-endian later. */
9005 if (d->perm[0] == 0)
9006 odd = 0;
9007 else if (d->perm[0] == 1)
9008 odd = 1;
9009 else
9010 return false;
9011 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9013 for (i = 0; i < nelt; i++)
9015 unsigned elt = (i * 2 + odd) & mask;
9016 if (d->perm[i] != elt)
9017 return false;
9020 /* Success! */
9021 if (d->testing_p)
9022 return true;
9024 in0 = d->op0;
9025 in1 = d->op1;
9026 if (BYTES_BIG_ENDIAN)
9028 x = in0, in0 = in1, in1 = x;
9029 odd = !odd;
9031 out = d->target;
9033 if (odd)
9035 switch (vmode)
9037 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9038 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9039 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9040 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9041 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9042 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9043 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9044 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9045 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9046 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9047 default:
9048 return false;
9051 else
9053 switch (vmode)
9055 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9056 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9057 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9058 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9059 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9060 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9061 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9062 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9063 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9064 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9065 default:
9066 return false;
9070 emit_insn (gen (out, in0, in1));
9071 return true;
9074 /* Recognize patterns suitable for the ZIP instructions. */
9075 static bool
9076 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9078 unsigned int i, high, mask, nelt = d->nelt;
9079 rtx out, in0, in1, x;
9080 rtx (*gen) (rtx, rtx, rtx);
9081 enum machine_mode vmode = d->vmode;
9083 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9084 return false;
9086 /* Note that these are little-endian tests.
9087 We correct for big-endian later. */
9088 high = nelt / 2;
9089 if (d->perm[0] == high)
9090 /* Do Nothing. */
9092 else if (d->perm[0] == 0)
9093 high = 0;
9094 else
9095 return false;
9096 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9098 for (i = 0; i < nelt / 2; i++)
9100 unsigned elt = (i + high) & mask;
9101 if (d->perm[i * 2] != elt)
9102 return false;
9103 elt = (elt + nelt) & mask;
9104 if (d->perm[i * 2 + 1] != elt)
9105 return false;
9108 /* Success! */
9109 if (d->testing_p)
9110 return true;
9112 in0 = d->op0;
9113 in1 = d->op1;
9114 if (BYTES_BIG_ENDIAN)
9116 x = in0, in0 = in1, in1 = x;
9117 high = !high;
9119 out = d->target;
9121 if (high)
9123 switch (vmode)
9125 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9126 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9127 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9128 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9129 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9130 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9131 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9132 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9133 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9134 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9135 default:
9136 return false;
9139 else
9141 switch (vmode)
9143 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9144 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9145 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9146 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9147 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9148 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9149 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9150 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9151 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9152 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9153 default:
9154 return false;
9158 emit_insn (gen (out, in0, in1));
9159 return true;
9162 /* Recognize patterns for the EXT insn. */
9164 static bool
9165 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9167 unsigned int i, nelt = d->nelt;
9168 rtx (*gen) (rtx, rtx, rtx, rtx);
9169 rtx offset;
9171 unsigned int location = d->perm[0]; /* Always < nelt. */
9173 /* Check if the extracted indices are increasing by one. */
9174 for (i = 1; i < nelt; i++)
9176 unsigned int required = location + i;
9177 if (d->one_vector_p)
9179 /* We'll pass the same vector in twice, so allow indices to wrap. */
9180 required &= (nelt - 1);
9182 if (d->perm[i] != required)
9183 return false;
9186 switch (d->vmode)
9188 case V16QImode: gen = gen_aarch64_extv16qi; break;
9189 case V8QImode: gen = gen_aarch64_extv8qi; break;
9190 case V4HImode: gen = gen_aarch64_extv4hi; break;
9191 case V8HImode: gen = gen_aarch64_extv8hi; break;
9192 case V2SImode: gen = gen_aarch64_extv2si; break;
9193 case V4SImode: gen = gen_aarch64_extv4si; break;
9194 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9195 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9196 case V2DImode: gen = gen_aarch64_extv2di; break;
9197 case V2DFmode: gen = gen_aarch64_extv2df; break;
9198 default:
9199 return false;
9202 /* Success! */
9203 if (d->testing_p)
9204 return true;
9206 /* The case where (location == 0) is a no-op for both big- and little-endian,
9207 and is removed by the mid-end at optimization levels -O1 and higher. */
9209 if (BYTES_BIG_ENDIAN && (location != 0))
9211 /* After setup, we want the high elements of the first vector (stored
9212 at the LSB end of the register), and the low elements of the second
9213 vector (stored at the MSB end of the register). So swap. */
9214 rtx temp = d->op0;
9215 d->op0 = d->op1;
9216 d->op1 = temp;
9217 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9218 location = nelt - location;
9221 offset = GEN_INT (location);
9222 emit_insn (gen (d->target, d->op0, d->op1, offset));
9223 return true;
9226 /* Recognize patterns for the REV insns. */
9228 static bool
9229 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9231 unsigned int i, j, diff, nelt = d->nelt;
9232 rtx (*gen) (rtx, rtx);
9234 if (!d->one_vector_p)
9235 return false;
9237 diff = d->perm[0];
9238 switch (diff)
9240 case 7:
9241 switch (d->vmode)
9243 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9244 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9245 default:
9246 return false;
9248 break;
9249 case 3:
9250 switch (d->vmode)
9252 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9253 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9254 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9255 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9256 default:
9257 return false;
9259 break;
9260 case 1:
9261 switch (d->vmode)
9263 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9264 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9265 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9266 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9267 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9268 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9269 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9270 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9271 default:
9272 return false;
9274 break;
9275 default:
9276 return false;
9279 for (i = 0; i < nelt ; i += diff + 1)
9280 for (j = 0; j <= diff; j += 1)
9282 /* This is guaranteed to be true as the value of diff
9283 is 7, 3, 1 and we should have enough elements in the
9284 queue to generate this. Getting a vector mask with a
9285 value of diff other than these values implies that
9286 something is wrong by the time we get here. */
9287 gcc_assert (i + j < nelt);
9288 if (d->perm[i + j] != i + diff - j)
9289 return false;
9292 /* Success! */
9293 if (d->testing_p)
9294 return true;
9296 emit_insn (gen (d->target, d->op0));
9297 return true;
9300 static bool
9301 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9303 rtx (*gen) (rtx, rtx, rtx);
9304 rtx out = d->target;
9305 rtx in0;
9306 enum machine_mode vmode = d->vmode;
9307 unsigned int i, elt, nelt = d->nelt;
9308 rtx lane;
9310 elt = d->perm[0];
9311 for (i = 1; i < nelt; i++)
9313 if (elt != d->perm[i])
9314 return false;
9317 /* The generic preparation in aarch64_expand_vec_perm_const_1
9318 swaps the operand order and the permute indices if it finds
9319 d->perm[0] to be in the second operand. Thus, we can always
9320 use d->op0 and need not do any extra arithmetic to get the
9321 correct lane number. */
9322 in0 = d->op0;
9323 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
9325 switch (vmode)
9327 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9328 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9329 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9330 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9331 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9332 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9333 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9334 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9335 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9336 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9337 default:
9338 return false;
9341 emit_insn (gen (out, in0, lane));
9342 return true;
9345 static bool
9346 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9348 rtx rperm[MAX_VECT_LEN], sel;
9349 enum machine_mode vmode = d->vmode;
9350 unsigned int i, nelt = d->nelt;
9352 if (d->testing_p)
9353 return true;
9355 /* Generic code will try constant permutation twice. Once with the
9356 original mode and again with the elements lowered to QImode.
9357 So wait and don't do the selector expansion ourselves. */
9358 if (vmode != V8QImode && vmode != V16QImode)
9359 return false;
9361 for (i = 0; i < nelt; ++i)
9363 int nunits = GET_MODE_NUNITS (vmode);
9365 /* If big-endian and two vectors we end up with a weird mixed-endian
9366 mode on NEON. Reverse the index within each word but not the word
9367 itself. */
9368 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9369 : d->perm[i]);
9371 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9372 sel = force_reg (vmode, sel);
9374 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9375 return true;
9378 static bool
9379 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9381 /* The pattern matching functions above are written to look for a small
9382 number to begin the sequence (0, 1, N/2). If we begin with an index
9383 from the second operand, we can swap the operands. */
9384 if (d->perm[0] >= d->nelt)
9386 unsigned i, nelt = d->nelt;
9387 rtx x;
9389 gcc_assert (nelt == (nelt & -nelt));
9390 for (i = 0; i < nelt; ++i)
9391 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9393 x = d->op0;
9394 d->op0 = d->op1;
9395 d->op1 = x;
9398 if (TARGET_SIMD)
9400 if (aarch64_evpc_rev (d))
9401 return true;
9402 else if (aarch64_evpc_ext (d))
9403 return true;
9404 else if (aarch64_evpc_dup (d))
9405 return true;
9406 else if (aarch64_evpc_zip (d))
9407 return true;
9408 else if (aarch64_evpc_uzp (d))
9409 return true;
9410 else if (aarch64_evpc_trn (d))
9411 return true;
9412 return aarch64_evpc_tbl (d);
9414 return false;
9417 /* Expand a vec_perm_const pattern. */
9419 bool
9420 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9422 struct expand_vec_perm_d d;
9423 int i, nelt, which;
9425 d.target = target;
9426 d.op0 = op0;
9427 d.op1 = op1;
9429 d.vmode = GET_MODE (target);
9430 gcc_assert (VECTOR_MODE_P (d.vmode));
9431 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9432 d.testing_p = false;
9434 for (i = which = 0; i < nelt; ++i)
9436 rtx e = XVECEXP (sel, 0, i);
9437 int ei = INTVAL (e) & (2 * nelt - 1);
9438 which |= (ei < nelt ? 1 : 2);
9439 d.perm[i] = ei;
9442 switch (which)
9444 default:
9445 gcc_unreachable ();
9447 case 3:
9448 d.one_vector_p = false;
9449 if (!rtx_equal_p (op0, op1))
9450 break;
9452 /* The elements of PERM do not suggest that only the first operand
9453 is used, but both operands are identical. Allow easier matching
9454 of the permutation by folding the permutation into the single
9455 input vector. */
9456 /* Fall Through. */
9457 case 2:
9458 for (i = 0; i < nelt; ++i)
9459 d.perm[i] &= nelt - 1;
9460 d.op0 = op1;
9461 d.one_vector_p = true;
9462 break;
9464 case 1:
9465 d.op1 = op0;
9466 d.one_vector_p = true;
9467 break;
9470 return aarch64_expand_vec_perm_const_1 (&d);
9473 static bool
9474 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9475 const unsigned char *sel)
9477 struct expand_vec_perm_d d;
9478 unsigned int i, nelt, which;
9479 bool ret;
9481 d.vmode = vmode;
9482 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9483 d.testing_p = true;
9484 memcpy (d.perm, sel, nelt);
9486 /* Calculate whether all elements are in one vector. */
9487 for (i = which = 0; i < nelt; ++i)
9489 unsigned char e = d.perm[i];
9490 gcc_assert (e < 2 * nelt);
9491 which |= (e < nelt ? 1 : 2);
9494 /* If all elements are from the second vector, reindex as if from the
9495 first vector. */
9496 if (which == 2)
9497 for (i = 0; i < nelt; ++i)
9498 d.perm[i] -= nelt;
9500 /* Check whether the mask can be applied to a single vector. */
9501 d.one_vector_p = (which != 3);
9503 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9504 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9505 if (!d.one_vector_p)
9506 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9508 start_sequence ();
9509 ret = aarch64_expand_vec_perm_const_1 (&d);
9510 end_sequence ();
9512 return ret;
9515 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9516 bool
9517 aarch64_cannot_change_mode_class (enum machine_mode from,
9518 enum machine_mode to,
9519 enum reg_class rclass)
9521 /* Full-reg subregs are allowed on general regs or any class if they are
9522 the same size. */
9523 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9524 || !reg_classes_intersect_p (FP_REGS, rclass))
9525 return false;
9527 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9528 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9529 2. Scalar to Scalar for integer modes or same size float modes.
9530 3. Vector to Vector modes.
9531 4. On little-endian only, Vector-Structure to Vector modes. */
9532 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9534 if (aarch64_vector_mode_supported_p (from)
9535 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9536 return false;
9538 if (GET_MODE_NUNITS (from) == 1
9539 && GET_MODE_NUNITS (to) == 1
9540 && (GET_MODE_CLASS (from) == MODE_INT
9541 || from == to))
9542 return false;
9544 if (aarch64_vector_mode_supported_p (from)
9545 && aarch64_vector_mode_supported_p (to))
9546 return false;
9548 /* Within an vector structure straddling multiple vector registers
9549 we are in a mixed-endian representation. As such, we can't
9550 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9551 switch between vectors and vector structures cheaply. */
9552 if (!BYTES_BIG_ENDIAN)
9553 if ((aarch64_vector_mode_supported_p (from)
9554 && aarch64_vect_struct_mode_p (to))
9555 || (aarch64_vector_mode_supported_p (to)
9556 && aarch64_vect_struct_mode_p (from)))
9557 return false;
9560 return true;
9563 /* Implement MODES_TIEABLE_P. */
9565 bool
9566 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9568 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9569 return true;
9571 /* We specifically want to allow elements of "structure" modes to
9572 be tieable to the structure. This more general condition allows
9573 other rarer situations too. */
9574 if (TARGET_SIMD
9575 && aarch64_vector_mode_p (mode1)
9576 && aarch64_vector_mode_p (mode2))
9577 return true;
9579 return false;
9582 /* Return a new RTX holding the result of moving POINTER forward by
9583 AMOUNT bytes. */
9585 static rtx
9586 aarch64_move_pointer (rtx pointer, int amount)
9588 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9590 return adjust_automodify_address (pointer, GET_MODE (pointer),
9591 next, amount);
9594 /* Return a new RTX holding the result of moving POINTER forward by the
9595 size of the mode it points to. */
9597 static rtx
9598 aarch64_progress_pointer (rtx pointer)
9600 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9602 return aarch64_move_pointer (pointer, amount);
9605 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9606 MODE bytes. */
9608 static void
9609 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9610 enum machine_mode mode)
9612 rtx reg = gen_reg_rtx (mode);
9614 /* "Cast" the pointers to the correct mode. */
9615 *src = adjust_address (*src, mode, 0);
9616 *dst = adjust_address (*dst, mode, 0);
9617 /* Emit the memcpy. */
9618 emit_move_insn (reg, *src);
9619 emit_move_insn (*dst, reg);
9620 /* Move the pointers forward. */
9621 *src = aarch64_progress_pointer (*src);
9622 *dst = aarch64_progress_pointer (*dst);
9625 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9626 we succeed, otherwise return false. */
9628 bool
9629 aarch64_expand_movmem (rtx *operands)
9631 unsigned int n;
9632 rtx dst = operands[0];
9633 rtx src = operands[1];
9634 rtx base;
9635 bool speed_p = !optimize_function_for_size_p (cfun);
9637 /* When optimizing for size, give a better estimate of the length of a
9638 memcpy call, but use the default otherwise. */
9639 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9641 /* We can't do anything smart if the amount to copy is not constant. */
9642 if (!CONST_INT_P (operands[2]))
9643 return false;
9645 n = UINTVAL (operands[2]);
9647 /* Try to keep the number of instructions low. For cases below 16 bytes we
9648 need to make at most two moves. For cases above 16 bytes it will be one
9649 move for each 16 byte chunk, then at most two additional moves. */
9650 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9651 return false;
9653 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9654 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9656 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9657 src = adjust_automodify_address (src, VOIDmode, base, 0);
9659 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9660 1-byte chunk. */
9661 if (n < 4)
9663 if (n >= 2)
9665 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9666 n -= 2;
9669 if (n == 1)
9670 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9672 return true;
9675 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9676 4-byte chunk, partially overlapping with the previously copied chunk. */
9677 if (n < 8)
9679 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9680 n -= 4;
9681 if (n > 0)
9683 int move = n - 4;
9685 src = aarch64_move_pointer (src, move);
9686 dst = aarch64_move_pointer (dst, move);
9687 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9689 return true;
9692 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9693 them, then (if applicable) an 8-byte chunk. */
9694 while (n >= 8)
9696 if (n / 16)
9698 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9699 n -= 16;
9701 else
9703 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9704 n -= 8;
9708 /* Finish the final bytes of the copy. We can always do this in one
9709 instruction. We either copy the exact amount we need, or partially
9710 overlap with the previous chunk we copied and copy 8-bytes. */
9711 if (n == 0)
9712 return true;
9713 else if (n == 1)
9714 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9715 else if (n == 2)
9716 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9717 else if (n == 4)
9718 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9719 else
9721 if (n == 3)
9723 src = aarch64_move_pointer (src, -1);
9724 dst = aarch64_move_pointer (dst, -1);
9725 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9727 else
9729 int move = n - 8;
9731 src = aarch64_move_pointer (src, move);
9732 dst = aarch64_move_pointer (dst, move);
9733 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9737 return true;
9740 #undef TARGET_ADDRESS_COST
9741 #define TARGET_ADDRESS_COST aarch64_address_cost
9743 /* This hook will determines whether unnamed bitfields affect the alignment
9744 of the containing structure. The hook returns true if the structure
9745 should inherit the alignment requirements of an unnamed bitfield's
9746 type. */
9747 #undef TARGET_ALIGN_ANON_BITFIELD
9748 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9750 #undef TARGET_ASM_ALIGNED_DI_OP
9751 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9753 #undef TARGET_ASM_ALIGNED_HI_OP
9754 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9756 #undef TARGET_ASM_ALIGNED_SI_OP
9757 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9759 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9760 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9761 hook_bool_const_tree_hwi_hwi_const_tree_true
9763 #undef TARGET_ASM_FILE_START
9764 #define TARGET_ASM_FILE_START aarch64_start_file
9766 #undef TARGET_ASM_OUTPUT_MI_THUNK
9767 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9769 #undef TARGET_ASM_SELECT_RTX_SECTION
9770 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9772 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9773 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9775 #undef TARGET_BUILD_BUILTIN_VA_LIST
9776 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9778 #undef TARGET_CALLEE_COPIES
9779 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9781 #undef TARGET_CAN_ELIMINATE
9782 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9784 #undef TARGET_CANNOT_FORCE_CONST_MEM
9785 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9787 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9788 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9790 /* Only the least significant bit is used for initialization guard
9791 variables. */
9792 #undef TARGET_CXX_GUARD_MASK_BIT
9793 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9795 #undef TARGET_C_MODE_FOR_SUFFIX
9796 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9798 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9799 #undef TARGET_DEFAULT_TARGET_FLAGS
9800 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9801 #endif
9803 #undef TARGET_CLASS_MAX_NREGS
9804 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9806 #undef TARGET_BUILTIN_DECL
9807 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9809 #undef TARGET_EXPAND_BUILTIN
9810 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9812 #undef TARGET_EXPAND_BUILTIN_VA_START
9813 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9815 #undef TARGET_FOLD_BUILTIN
9816 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9818 #undef TARGET_FUNCTION_ARG
9819 #define TARGET_FUNCTION_ARG aarch64_function_arg
9821 #undef TARGET_FUNCTION_ARG_ADVANCE
9822 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9824 #undef TARGET_FUNCTION_ARG_BOUNDARY
9825 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9827 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9828 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9830 #undef TARGET_FUNCTION_VALUE
9831 #define TARGET_FUNCTION_VALUE aarch64_function_value
9833 #undef TARGET_FUNCTION_VALUE_REGNO_P
9834 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9836 #undef TARGET_FRAME_POINTER_REQUIRED
9837 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9839 #undef TARGET_GIMPLE_FOLD_BUILTIN
9840 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9842 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9843 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9845 #undef TARGET_INIT_BUILTINS
9846 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9848 #undef TARGET_LEGITIMATE_ADDRESS_P
9849 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9851 #undef TARGET_LEGITIMATE_CONSTANT_P
9852 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9854 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9855 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9857 #undef TARGET_LRA_P
9858 #define TARGET_LRA_P aarch64_lra_p
9860 #undef TARGET_MANGLE_TYPE
9861 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9863 #undef TARGET_MEMORY_MOVE_COST
9864 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9866 #undef TARGET_MUST_PASS_IN_STACK
9867 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9869 /* This target hook should return true if accesses to volatile bitfields
9870 should use the narrowest mode possible. It should return false if these
9871 accesses should use the bitfield container type. */
9872 #undef TARGET_NARROW_VOLATILE_BITFIELD
9873 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9875 #undef TARGET_OPTION_OVERRIDE
9876 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9878 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9879 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9880 aarch64_override_options_after_change
9882 #undef TARGET_PASS_BY_REFERENCE
9883 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9885 #undef TARGET_PREFERRED_RELOAD_CLASS
9886 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9888 #undef TARGET_SECONDARY_RELOAD
9889 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9891 #undef TARGET_SHIFT_TRUNCATION_MASK
9892 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9894 #undef TARGET_SETUP_INCOMING_VARARGS
9895 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9897 #undef TARGET_STRUCT_VALUE_RTX
9898 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9900 #undef TARGET_REGISTER_MOVE_COST
9901 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9903 #undef TARGET_RETURN_IN_MEMORY
9904 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9906 #undef TARGET_RETURN_IN_MSB
9907 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9909 #undef TARGET_RTX_COSTS
9910 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9912 #undef TARGET_SCHED_ISSUE_RATE
9913 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9915 #undef TARGET_TRAMPOLINE_INIT
9916 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9918 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9919 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9921 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9922 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9924 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9925 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9927 #undef TARGET_VECTORIZE_ADD_STMT_COST
9928 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9930 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9931 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9932 aarch64_builtin_vectorization_cost
9934 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9935 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9937 #undef TARGET_VECTORIZE_BUILTINS
9938 #define TARGET_VECTORIZE_BUILTINS
9940 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9941 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9942 aarch64_builtin_vectorized_function
9944 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9945 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9946 aarch64_autovectorize_vector_sizes
9948 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9949 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9950 aarch64_atomic_assign_expand_fenv
9952 /* Section anchor support. */
9954 #undef TARGET_MIN_ANCHOR_OFFSET
9955 #define TARGET_MIN_ANCHOR_OFFSET -256
9957 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9958 byte offset; we can do much more for larger data types, but have no way
9959 to determine the size of the access. We assume accesses are aligned. */
9960 #undef TARGET_MAX_ANCHOR_OFFSET
9961 #define TARGET_MAX_ANCHOR_OFFSET 4095
9963 #undef TARGET_VECTOR_ALIGNMENT
9964 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9966 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9967 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9968 aarch64_simd_vector_alignment_reachable
9970 /* vec_perm support. */
9972 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9973 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9974 aarch64_vectorize_vec_perm_const_ok
9977 #undef TARGET_FIXED_CONDITION_CODE_REGS
9978 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9980 #undef TARGET_FLAGS_REGNUM
9981 #define TARGET_FLAGS_REGNUM CC_REGNUM
9983 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
9984 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
9986 struct gcc_target targetm = TARGET_INITIALIZER;
9988 #include "gt-aarch64.h"