Merged revisions 208012,208018-208019,208021,208023-208030,208033,208037,208040-20804...
[official-gcc.git] / main / gcc / config / aarch64 / aarch64.c
blobe2812d22ec973818fd7dd8fc0de658e732800ef8
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70 /* Classifies an address.
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
93 enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
103 struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
111 struct simd_immediate_info
113 rtx value;
114 int shift;
115 int element_width;
116 bool mvn;
117 bool msl;
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel;
123 #ifdef HAVE_AS_TLS
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
126 #endif
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
138 static unsigned bit_count (unsigned HOST_WIDE_INT);
139 static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune = cortexa53;
148 /* The current tuning set. */
149 const struct tune_params *aarch64_tune_params;
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags = 0;
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags = 0;
157 /* Tuning parameters. */
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161 #else
162 #define NAMED_PARAM(NAME, VAL) (VAL)
163 #endif
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166 __extension__
167 #endif
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 __extension__
171 #endif
172 static const struct cpu_addrcost_table generic_addrcost_table =
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct cpu_regmove_cost generic_regmove_cost =
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197 __extension__
198 #endif
199 static const struct cpu_vector_cost generic_vector_cost =
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 __extension__
217 #endif
218 static const struct tune_params generic_tunings =
220 &cortexa57_extra_costs,
221 &generic_addrcost_table,
222 &generic_regmove_cost,
223 &generic_vector_cost,
224 NAMED_PARAM (memmov_cost, 4),
225 NAMED_PARAM (issue_rate, 2)
228 static const struct tune_params cortexa53_tunings =
230 &cortexa53_extra_costs,
231 &generic_addrcost_table,
232 &generic_regmove_cost,
233 &generic_vector_cost,
234 NAMED_PARAM (memmov_cost, 4),
235 NAMED_PARAM (issue_rate, 2)
238 static const struct tune_params cortexa57_tunings =
240 &cortexa57_extra_costs,
241 &generic_addrcost_table,
242 &generic_regmove_cost,
243 &generic_vector_cost,
244 NAMED_PARAM (memmov_cost, 4),
245 NAMED_PARAM (issue_rate, 3)
248 /* A processor implementing AArch64. */
249 struct processor
251 const char *const name;
252 enum aarch64_processor core;
253 const char *arch;
254 const unsigned long flags;
255 const struct tune_params *const tune;
258 /* Processor cores implementing AArch64. */
259 static const struct processor all_cores[] =
261 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
262 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
263 #include "aarch64-cores.def"
264 #undef AARCH64_CORE
265 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
266 {NULL, aarch64_none, NULL, 0, NULL}
269 /* Architectures implementing AArch64. */
270 static const struct processor all_architectures[] =
272 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
273 {NAME, CORE, #ARCH, FLAGS, NULL},
274 #include "aarch64-arches.def"
275 #undef AARCH64_ARCH
276 {NULL, aarch64_none, NULL, 0, NULL}
279 /* Target specification. These are populated as commandline arguments
280 are processed, or NULL if not specified. */
281 static const struct processor *selected_arch;
282 static const struct processor *selected_cpu;
283 static const struct processor *selected_tune;
285 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
287 /* An ISA extension in the co-processor and main instruction set space. */
288 struct aarch64_option_extension
290 const char *const name;
291 const unsigned long flags_on;
292 const unsigned long flags_off;
295 /* ISA extensions in AArch64. */
296 static const struct aarch64_option_extension all_extensions[] =
298 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
299 {NAME, FLAGS_ON, FLAGS_OFF},
300 #include "aarch64-option-extensions.def"
301 #undef AARCH64_OPT_EXTENSION
302 {NULL, 0, 0}
305 /* Used to track the size of an address when generating a pre/post
306 increment address. */
307 static enum machine_mode aarch64_memory_reference_mode;
309 /* Used to force GTY into this file. */
310 static GTY(()) int gty_dummy;
312 /* A table of valid AArch64 "bitmask immediate" values for
313 logical instructions. */
315 #define AARCH64_NUM_BITMASKS 5334
316 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
318 typedef enum aarch64_cond_code
320 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
321 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
322 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
324 aarch64_cc;
326 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
328 /* The condition codes of the processor, and the inverse function. */
329 static const char * const aarch64_condition_codes[] =
331 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
332 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
335 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
336 unsigned
337 aarch64_dbx_register_number (unsigned regno)
339 if (GP_REGNUM_P (regno))
340 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
341 else if (regno == SP_REGNUM)
342 return AARCH64_DWARF_SP;
343 else if (FP_REGNUM_P (regno))
344 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
346 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
347 equivalent DWARF register. */
348 return DWARF_FRAME_REGISTERS;
351 /* Return TRUE if MODE is any of the large INT modes. */
352 static bool
353 aarch64_vect_struct_mode_p (enum machine_mode mode)
355 return mode == OImode || mode == CImode || mode == XImode;
358 /* Return TRUE if MODE is any of the vector modes. */
359 static bool
360 aarch64_vector_mode_p (enum machine_mode mode)
362 return aarch64_vector_mode_supported_p (mode)
363 || aarch64_vect_struct_mode_p (mode);
366 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
367 static bool
368 aarch64_array_mode_supported_p (enum machine_mode mode,
369 unsigned HOST_WIDE_INT nelems)
371 if (TARGET_SIMD
372 && AARCH64_VALID_SIMD_QREG_MODE (mode)
373 && (nelems >= 2 && nelems <= 4))
374 return true;
376 return false;
379 /* Implement HARD_REGNO_NREGS. */
382 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
384 switch (aarch64_regno_regclass (regno))
386 case FP_REGS:
387 case FP_LO_REGS:
388 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
389 default:
390 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
392 gcc_unreachable ();
395 /* Implement HARD_REGNO_MODE_OK. */
398 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
400 if (GET_MODE_CLASS (mode) == MODE_CC)
401 return regno == CC_REGNUM;
403 if (regno == SP_REGNUM)
404 /* The purpose of comparing with ptr_mode is to support the
405 global register variable associated with the stack pointer
406 register via the syntax of asm ("wsp") in ILP32. */
407 return mode == Pmode || mode == ptr_mode;
409 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
410 return mode == Pmode;
412 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
413 return 1;
415 if (FP_REGNUM_P (regno))
417 if (aarch64_vect_struct_mode_p (mode))
418 return
419 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
420 else
421 return 1;
424 return 0;
427 /* Return true if calls to DECL should be treated as
428 long-calls (ie called via a register). */
429 static bool
430 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
432 return false;
435 /* Return true if calls to symbol-ref SYM should be treated as
436 long-calls (ie called via a register). */
437 bool
438 aarch64_is_long_call_p (rtx sym)
440 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
443 /* Return true if the offsets to a zero/sign-extract operation
444 represent an expression that matches an extend operation. The
445 operands represent the paramters from
447 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
448 bool
449 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
450 rtx extract_imm)
452 HOST_WIDE_INT mult_val, extract_val;
454 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
455 return false;
457 mult_val = INTVAL (mult_imm);
458 extract_val = INTVAL (extract_imm);
460 if (extract_val > 8
461 && extract_val < GET_MODE_BITSIZE (mode)
462 && exact_log2 (extract_val & ~7) > 0
463 && (extract_val & 7) <= 4
464 && mult_val == (1 << (extract_val & 7)))
465 return true;
467 return false;
470 /* Emit an insn that's a simple single-set. Both the operands must be
471 known to be valid. */
472 inline static rtx
473 emit_set_insn (rtx x, rtx y)
475 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
478 /* X and Y are two things to compare using CODE. Emit the compare insn and
479 return the rtx for register 0 in the proper mode. */
481 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
483 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
484 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
486 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
487 return cc_reg;
490 /* Build the SYMBOL_REF for __tls_get_addr. */
492 static GTY(()) rtx tls_get_addr_libfunc;
495 aarch64_tls_get_addr (void)
497 if (!tls_get_addr_libfunc)
498 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
499 return tls_get_addr_libfunc;
502 /* Return the TLS model to use for ADDR. */
504 static enum tls_model
505 tls_symbolic_operand_type (rtx addr)
507 enum tls_model tls_kind = TLS_MODEL_NONE;
508 rtx sym, addend;
510 if (GET_CODE (addr) == CONST)
512 split_const (addr, &sym, &addend);
513 if (GET_CODE (sym) == SYMBOL_REF)
514 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
516 else if (GET_CODE (addr) == SYMBOL_REF)
517 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
519 return tls_kind;
522 /* We'll allow lo_sum's in addresses in our legitimate addresses
523 so that combine would take care of combining addresses where
524 necessary, but for generation purposes, we'll generate the address
525 as :
526 RTL Absolute
527 tmp = hi (symbol_ref); adrp x1, foo
528 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
531 PIC TLS
532 adrp x1, :got:foo adrp tmp, :tlsgd:foo
533 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
534 bl __tls_get_addr
537 Load TLS symbol, depending on TLS mechanism and TLS access model.
539 Global Dynamic - Traditional TLS:
540 adrp tmp, :tlsgd:imm
541 add dest, tmp, #:tlsgd_lo12:imm
542 bl __tls_get_addr
544 Global Dynamic - TLS Descriptors:
545 adrp dest, :tlsdesc:imm
546 ldr tmp, [dest, #:tlsdesc_lo12:imm]
547 add dest, dest, #:tlsdesc_lo12:imm
548 blr tmp
549 mrs tp, tpidr_el0
550 add dest, dest, tp
552 Initial Exec:
553 mrs tp, tpidr_el0
554 adrp tmp, :gottprel:imm
555 ldr dest, [tmp, #:gottprel_lo12:imm]
556 add dest, dest, tp
558 Local Exec:
559 mrs tp, tpidr_el0
560 add t0, tp, #:tprel_hi12:imm
561 add t0, #:tprel_lo12_nc:imm
564 static void
565 aarch64_load_symref_appropriately (rtx dest, rtx imm,
566 enum aarch64_symbol_type type)
568 switch (type)
570 case SYMBOL_SMALL_ABSOLUTE:
572 /* In ILP32, the mode of dest can be either SImode or DImode. */
573 rtx tmp_reg = dest;
574 enum machine_mode mode = GET_MODE (dest);
576 gcc_assert (mode == Pmode || mode == ptr_mode);
578 if (can_create_pseudo_p ())
579 tmp_reg = gen_reg_rtx (mode);
581 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
582 emit_insn (gen_add_losym (dest, tmp_reg, imm));
583 return;
586 case SYMBOL_TINY_ABSOLUTE:
587 emit_insn (gen_rtx_SET (Pmode, dest, imm));
588 return;
590 case SYMBOL_SMALL_GOT:
592 /* In ILP32, the mode of dest can be either SImode or DImode,
593 while the got entry is always of SImode size. The mode of
594 dest depends on how dest is used: if dest is assigned to a
595 pointer (e.g. in the memory), it has SImode; it may have
596 DImode if dest is dereferenced to access the memeory.
597 This is why we have to handle three different ldr_got_small
598 patterns here (two patterns for ILP32). */
599 rtx tmp_reg = dest;
600 enum machine_mode mode = GET_MODE (dest);
602 if (can_create_pseudo_p ())
603 tmp_reg = gen_reg_rtx (mode);
605 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
606 if (mode == ptr_mode)
608 if (mode == DImode)
609 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
610 else
611 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
613 else
615 gcc_assert (mode == Pmode);
616 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
619 return;
622 case SYMBOL_SMALL_TLSGD:
624 rtx insns;
625 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
627 start_sequence ();
628 emit_call_insn (gen_tlsgd_small (result, imm));
629 insns = get_insns ();
630 end_sequence ();
632 RTL_CONST_CALL_P (insns) = 1;
633 emit_libcall_block (insns, dest, result, imm);
634 return;
637 case SYMBOL_SMALL_TLSDESC:
639 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
640 rtx tp;
642 emit_insn (gen_tlsdesc_small (imm));
643 tp = aarch64_load_tp (NULL);
644 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
645 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
646 return;
649 case SYMBOL_SMALL_GOTTPREL:
651 rtx tmp_reg = gen_reg_rtx (Pmode);
652 rtx tp = aarch64_load_tp (NULL);
653 emit_insn (gen_tlsie_small (tmp_reg, imm));
654 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
655 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
656 return;
659 case SYMBOL_SMALL_TPREL:
661 rtx tp = aarch64_load_tp (NULL);
662 emit_insn (gen_tlsle_small (dest, tp, imm));
663 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
664 return;
667 case SYMBOL_TINY_GOT:
668 emit_insn (gen_ldr_got_tiny (dest, imm));
669 return;
671 default:
672 gcc_unreachable ();
676 /* Emit a move from SRC to DEST. Assume that the move expanders can
677 handle all moves if !can_create_pseudo_p (). The distinction is
678 important because, unlike emit_move_insn, the move expanders know
679 how to force Pmode objects into the constant pool even when the
680 constant pool address is not itself legitimate. */
681 static rtx
682 aarch64_emit_move (rtx dest, rtx src)
684 return (can_create_pseudo_p ()
685 ? emit_move_insn (dest, src)
686 : emit_move_insn_1 (dest, src));
689 /* Split a 128-bit move operation into two 64-bit move operations,
690 taking care to handle partial overlap of register to register
691 copies. Special cases are needed when moving between GP regs and
692 FP regs. SRC can be a register, constant or memory; DST a register
693 or memory. If either operand is memory it must not have any side
694 effects. */
695 void
696 aarch64_split_128bit_move (rtx dst, rtx src)
698 rtx dst_lo, dst_hi;
699 rtx src_lo, src_hi;
701 enum machine_mode mode = GET_MODE (dst);
703 gcc_assert (mode == TImode || mode == TFmode);
704 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
705 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
707 if (REG_P (dst) && REG_P (src))
709 int src_regno = REGNO (src);
710 int dst_regno = REGNO (dst);
712 /* Handle FP <-> GP regs. */
713 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
715 src_lo = gen_lowpart (word_mode, src);
716 src_hi = gen_highpart (word_mode, src);
718 if (mode == TImode)
720 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
721 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
723 else
725 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
726 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
728 return;
730 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
732 dst_lo = gen_lowpart (word_mode, dst);
733 dst_hi = gen_highpart (word_mode, dst);
735 if (mode == TImode)
737 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
738 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
740 else
742 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
743 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
745 return;
749 dst_lo = gen_lowpart (word_mode, dst);
750 dst_hi = gen_highpart (word_mode, dst);
751 src_lo = gen_lowpart (word_mode, src);
752 src_hi = gen_highpart_mode (word_mode, mode, src);
754 /* At most one pairing may overlap. */
755 if (reg_overlap_mentioned_p (dst_lo, src_hi))
757 aarch64_emit_move (dst_hi, src_hi);
758 aarch64_emit_move (dst_lo, src_lo);
760 else
762 aarch64_emit_move (dst_lo, src_lo);
763 aarch64_emit_move (dst_hi, src_hi);
767 bool
768 aarch64_split_128bit_move_p (rtx dst, rtx src)
770 return (! REG_P (src)
771 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
774 /* Split a complex SIMD combine. */
776 void
777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
779 enum machine_mode src_mode = GET_MODE (src1);
780 enum machine_mode dst_mode = GET_MODE (dst);
782 gcc_assert (VECTOR_MODE_P (dst_mode));
784 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
786 rtx (*gen) (rtx, rtx, rtx);
788 switch (src_mode)
790 case V8QImode:
791 gen = gen_aarch64_simd_combinev8qi;
792 break;
793 case V4HImode:
794 gen = gen_aarch64_simd_combinev4hi;
795 break;
796 case V2SImode:
797 gen = gen_aarch64_simd_combinev2si;
798 break;
799 case V2SFmode:
800 gen = gen_aarch64_simd_combinev2sf;
801 break;
802 case DImode:
803 gen = gen_aarch64_simd_combinedi;
804 break;
805 case DFmode:
806 gen = gen_aarch64_simd_combinedf;
807 break;
808 default:
809 gcc_unreachable ();
812 emit_insn (gen (dst, src1, src2));
813 return;
817 /* Split a complex SIMD move. */
819 void
820 aarch64_split_simd_move (rtx dst, rtx src)
822 enum machine_mode src_mode = GET_MODE (src);
823 enum machine_mode dst_mode = GET_MODE (dst);
825 gcc_assert (VECTOR_MODE_P (dst_mode));
827 if (REG_P (dst) && REG_P (src))
829 rtx (*gen) (rtx, rtx);
831 gcc_assert (VECTOR_MODE_P (src_mode));
833 switch (src_mode)
835 case V16QImode:
836 gen = gen_aarch64_split_simd_movv16qi;
837 break;
838 case V8HImode:
839 gen = gen_aarch64_split_simd_movv8hi;
840 break;
841 case V4SImode:
842 gen = gen_aarch64_split_simd_movv4si;
843 break;
844 case V2DImode:
845 gen = gen_aarch64_split_simd_movv2di;
846 break;
847 case V4SFmode:
848 gen = gen_aarch64_split_simd_movv4sf;
849 break;
850 case V2DFmode:
851 gen = gen_aarch64_split_simd_movv2df;
852 break;
853 default:
854 gcc_unreachable ();
857 emit_insn (gen (dst, src));
858 return;
862 static rtx
863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
865 if (can_create_pseudo_p ())
866 return force_reg (mode, value);
867 else
869 x = aarch64_emit_move (x, value);
870 return x;
875 static rtx
876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
878 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
880 rtx high;
881 /* Load the full offset into a register. This
882 might be improvable in the future. */
883 high = GEN_INT (offset);
884 offset = 0;
885 high = aarch64_force_temporary (mode, temp, high);
886 reg = aarch64_force_temporary (mode, temp,
887 gen_rtx_PLUS (mode, high, reg));
889 return plus_constant (mode, reg, offset);
892 void
893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
895 enum machine_mode mode = GET_MODE (dest);
896 unsigned HOST_WIDE_INT mask;
897 int i;
898 bool first;
899 unsigned HOST_WIDE_INT val;
900 bool subtargets;
901 rtx subtarget;
902 int one_match, zero_match;
904 gcc_assert (mode == SImode || mode == DImode);
906 /* Check on what type of symbol it is. */
907 if (GET_CODE (imm) == SYMBOL_REF
908 || GET_CODE (imm) == LABEL_REF
909 || GET_CODE (imm) == CONST)
911 rtx mem, base, offset;
912 enum aarch64_symbol_type sty;
914 /* If we have (const (plus symbol offset)), separate out the offset
915 before we start classifying the symbol. */
916 split_const (imm, &base, &offset);
918 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
919 switch (sty)
921 case SYMBOL_FORCE_TO_MEM:
922 if (offset != const0_rtx
923 && targetm.cannot_force_const_mem (mode, imm))
925 gcc_assert (can_create_pseudo_p ());
926 base = aarch64_force_temporary (mode, dest, base);
927 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
928 aarch64_emit_move (dest, base);
929 return;
931 mem = force_const_mem (ptr_mode, imm);
932 gcc_assert (mem);
933 if (mode != ptr_mode)
934 mem = gen_rtx_ZERO_EXTEND (mode, mem);
935 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
936 return;
938 case SYMBOL_SMALL_TLSGD:
939 case SYMBOL_SMALL_TLSDESC:
940 case SYMBOL_SMALL_GOTTPREL:
941 case SYMBOL_SMALL_GOT:
942 case SYMBOL_TINY_GOT:
943 if (offset != const0_rtx)
945 gcc_assert(can_create_pseudo_p ());
946 base = aarch64_force_temporary (mode, dest, base);
947 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
948 aarch64_emit_move (dest, base);
949 return;
951 /* FALLTHRU */
953 case SYMBOL_SMALL_TPREL:
954 case SYMBOL_SMALL_ABSOLUTE:
955 case SYMBOL_TINY_ABSOLUTE:
956 aarch64_load_symref_appropriately (dest, imm, sty);
957 return;
959 default:
960 gcc_unreachable ();
964 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
966 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
967 return;
970 if (!CONST_INT_P (imm))
972 if (GET_CODE (imm) == HIGH)
973 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
974 else
976 rtx mem = force_const_mem (mode, imm);
977 gcc_assert (mem);
978 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
981 return;
984 if (mode == SImode)
986 /* We know we can't do this in 1 insn, and we must be able to do it
987 in two; so don't mess around looking for sequences that don't buy
988 us anything. */
989 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
990 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
991 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
992 return;
995 /* Remaining cases are all for DImode. */
997 val = INTVAL (imm);
998 subtargets = optimize && can_create_pseudo_p ();
1000 one_match = 0;
1001 zero_match = 0;
1002 mask = 0xffff;
1004 for (i = 0; i < 64; i += 16, mask <<= 16)
1006 if ((val & mask) == 0)
1007 zero_match++;
1008 else if ((val & mask) == mask)
1009 one_match++;
1012 if (one_match == 2)
1014 mask = 0xffff;
1015 for (i = 0; i < 64; i += 16, mask <<= 16)
1017 if ((val & mask) != mask)
1019 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021 GEN_INT ((val >> i) & 0xffff)));
1022 return;
1025 gcc_unreachable ();
1028 if (zero_match == 2)
1029 goto simple_sequence;
1031 mask = 0x0ffff0000UL;
1032 for (i = 16; i < 64; i += 16, mask <<= 16)
1034 HOST_WIDE_INT comp = mask & ~(mask - 1);
1036 if (aarch64_uimm12_shift (val - (val & mask)))
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041 emit_insn (gen_adddi3 (dest, subtarget,
1042 GEN_INT (val - (val & mask))));
1043 return;
1045 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1047 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050 GEN_INT ((val + comp) & mask)));
1051 emit_insn (gen_adddi3 (dest, subtarget,
1052 GEN_INT (val - ((val + comp) & mask))));
1053 return;
1055 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1057 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060 GEN_INT ((val - comp) | ~mask)));
1061 emit_insn (gen_adddi3 (dest, subtarget,
1062 GEN_INT (val - ((val - comp) | ~mask))));
1063 return;
1065 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1067 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1069 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070 GEN_INT (val | ~mask)));
1071 emit_insn (gen_adddi3 (dest, subtarget,
1072 GEN_INT (val - (val | ~mask))));
1073 return;
1077 /* See if we can do it by arithmetically combining two
1078 immediates. */
1079 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1081 int j;
1082 mask = 0xffff;
1084 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1087 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089 GEN_INT (aarch64_bitmasks[i])));
1090 emit_insn (gen_adddi3 (dest, subtarget,
1091 GEN_INT (val - aarch64_bitmasks[i])));
1092 return;
1095 for (j = 0; j < 64; j += 16, mask <<= 16)
1097 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1099 emit_insn (gen_rtx_SET (VOIDmode, dest,
1100 GEN_INT (aarch64_bitmasks[i])));
1101 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102 GEN_INT ((val >> j) & 0xffff)));
1103 return;
1108 /* See if we can do it by logically combining two immediates. */
1109 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1111 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1113 int j;
1115 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1118 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120 GEN_INT (aarch64_bitmasks[i])));
1121 emit_insn (gen_iordi3 (dest, subtarget,
1122 GEN_INT (aarch64_bitmasks[j])));
1123 return;
1126 else if ((val & aarch64_bitmasks[i]) == val)
1128 int j;
1130 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1134 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136 GEN_INT (aarch64_bitmasks[j])));
1137 emit_insn (gen_anddi3 (dest, subtarget,
1138 GEN_INT (aarch64_bitmasks[i])));
1139 return;
1144 simple_sequence:
1145 first = true;
1146 mask = 0xffff;
1147 for (i = 0; i < 64; i += 16, mask <<= 16)
1149 if ((val & mask) != 0)
1151 if (first)
1153 emit_insn (gen_rtx_SET (VOIDmode, dest,
1154 GEN_INT (val & mask)));
1155 first = false;
1157 else
1158 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159 GEN_INT ((val >> i) & 0xffff)));
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1167 /* Indirect calls are not currently supported. */
1168 if (decl == NULL)
1169 return false;
1171 /* Cannot tail-call to long-calls, since these are outside of the
1172 range of a branch instruction (we could handle this if we added
1173 support for indirect tail-calls. */
1174 if (aarch64_decl_is_long_call_p (decl))
1175 return false;
1177 return true;
1180 /* Implement TARGET_PASS_BY_REFERENCE. */
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184 enum machine_mode mode,
1185 const_tree type,
1186 bool named ATTRIBUTE_UNUSED)
1188 HOST_WIDE_INT size;
1189 enum machine_mode dummymode;
1190 int nregs;
1192 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1193 size = (mode == BLKmode && type)
1194 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1196 /* Aggregates are passed by reference based on their size. */
1197 if (type && AGGREGATE_TYPE_P (type))
1199 size = int_size_in_bytes (type);
1202 /* Variable sized arguments are always returned by reference. */
1203 if (size < 0)
1204 return true;
1206 /* Can this be a candidate to be passed in fp/simd register(s)? */
1207 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1208 &dummymode, &nregs,
1209 NULL))
1210 return false;
1212 /* Arguments which are variable sized or larger than 2 registers are
1213 passed by reference unless they are a homogenous floating point
1214 aggregate. */
1215 return size > 2 * UNITS_PER_WORD;
1218 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1219 static bool
1220 aarch64_return_in_msb (const_tree valtype)
1222 enum machine_mode dummy_mode;
1223 int dummy_int;
1225 /* Never happens in little-endian mode. */
1226 if (!BYTES_BIG_ENDIAN)
1227 return false;
1229 /* Only composite types smaller than or equal to 16 bytes can
1230 be potentially returned in registers. */
1231 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1232 || int_size_in_bytes (valtype) <= 0
1233 || int_size_in_bytes (valtype) > 16)
1234 return false;
1236 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1237 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1238 is always passed/returned in the least significant bits of fp/simd
1239 register(s). */
1240 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1241 &dummy_mode, &dummy_int, NULL))
1242 return false;
1244 return true;
1247 /* Implement TARGET_FUNCTION_VALUE.
1248 Define how to find the value returned by a function. */
1250 static rtx
1251 aarch64_function_value (const_tree type, const_tree func,
1252 bool outgoing ATTRIBUTE_UNUSED)
1254 enum machine_mode mode;
1255 int unsignedp;
1256 int count;
1257 enum machine_mode ag_mode;
1259 mode = TYPE_MODE (type);
1260 if (INTEGRAL_TYPE_P (type))
1261 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1263 if (aarch64_return_in_msb (type))
1265 HOST_WIDE_INT size = int_size_in_bytes (type);
1267 if (size % UNITS_PER_WORD != 0)
1269 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1270 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1274 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1275 &ag_mode, &count, NULL))
1277 if (!aarch64_composite_type_p (type, mode))
1279 gcc_assert (count == 1 && mode == ag_mode);
1280 return gen_rtx_REG (mode, V0_REGNUM);
1282 else
1284 int i;
1285 rtx par;
1287 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1288 for (i = 0; i < count; i++)
1290 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1291 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1292 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1293 XVECEXP (par, 0, i) = tmp;
1295 return par;
1298 else
1299 return gen_rtx_REG (mode, R0_REGNUM);
1302 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1303 Return true if REGNO is the number of a hard register in which the values
1304 of called function may come back. */
1306 static bool
1307 aarch64_function_value_regno_p (const unsigned int regno)
1309 /* Maximum of 16 bytes can be returned in the general registers. Examples
1310 of 16-byte return values are: 128-bit integers and 16-byte small
1311 structures (excluding homogeneous floating-point aggregates). */
1312 if (regno == R0_REGNUM || regno == R1_REGNUM)
1313 return true;
1315 /* Up to four fp/simd registers can return a function value, e.g. a
1316 homogeneous floating-point aggregate having four members. */
1317 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1318 return !TARGET_GENERAL_REGS_ONLY;
1320 return false;
1323 /* Implement TARGET_RETURN_IN_MEMORY.
1325 If the type T of the result of a function is such that
1326 void func (T arg)
1327 would require that arg be passed as a value in a register (or set of
1328 registers) according to the parameter passing rules, then the result
1329 is returned in the same registers as would be used for such an
1330 argument. */
1332 static bool
1333 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1335 HOST_WIDE_INT size;
1336 enum machine_mode ag_mode;
1337 int count;
1339 if (!AGGREGATE_TYPE_P (type)
1340 && TREE_CODE (type) != COMPLEX_TYPE
1341 && TREE_CODE (type) != VECTOR_TYPE)
1342 /* Simple scalar types always returned in registers. */
1343 return false;
1345 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1346 type,
1347 &ag_mode,
1348 &count,
1349 NULL))
1350 return false;
1352 /* Types larger than 2 registers returned in memory. */
1353 size = int_size_in_bytes (type);
1354 return (size < 0 || size > 2 * UNITS_PER_WORD);
1357 static bool
1358 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1359 const_tree type, int *nregs)
1361 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1362 return aarch64_vfp_is_call_or_return_candidate (mode,
1363 type,
1364 &pcum->aapcs_vfp_rmode,
1365 nregs,
1366 NULL);
1369 /* Given MODE and TYPE of a function argument, return the alignment in
1370 bits. The idea is to suppress any stronger alignment requested by
1371 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1372 This is a helper function for local use only. */
1374 static unsigned int
1375 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1377 unsigned int alignment;
1379 if (type)
1381 if (!integer_zerop (TYPE_SIZE (type)))
1383 if (TYPE_MODE (type) == mode)
1384 alignment = TYPE_ALIGN (type);
1385 else
1386 alignment = GET_MODE_ALIGNMENT (mode);
1388 else
1389 alignment = 0;
1391 else
1392 alignment = GET_MODE_ALIGNMENT (mode);
1394 return alignment;
1397 /* Layout a function argument according to the AAPCS64 rules. The rule
1398 numbers refer to the rule numbers in the AAPCS64. */
1400 static void
1401 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1402 const_tree type,
1403 bool named ATTRIBUTE_UNUSED)
1405 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1406 int ncrn, nvrn, nregs;
1407 bool allocate_ncrn, allocate_nvrn;
1409 /* We need to do this once per argument. */
1410 if (pcum->aapcs_arg_processed)
1411 return;
1413 pcum->aapcs_arg_processed = true;
1415 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1416 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1417 mode,
1418 type,
1419 &nregs);
1421 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1422 The following code thus handles passing by SIMD/FP registers first. */
1424 nvrn = pcum->aapcs_nvrn;
1426 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1427 and homogenous short-vector aggregates (HVA). */
1428 if (allocate_nvrn)
1430 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1432 pcum->aapcs_nextnvrn = nvrn + nregs;
1433 if (!aarch64_composite_type_p (type, mode))
1435 gcc_assert (nregs == 1);
1436 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1438 else
1440 rtx par;
1441 int i;
1442 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1443 for (i = 0; i < nregs; i++)
1445 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1446 V0_REGNUM + nvrn + i);
1447 tmp = gen_rtx_EXPR_LIST
1448 (VOIDmode, tmp,
1449 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1450 XVECEXP (par, 0, i) = tmp;
1452 pcum->aapcs_reg = par;
1454 return;
1456 else
1458 /* C.3 NSRN is set to 8. */
1459 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1460 goto on_stack;
1464 ncrn = pcum->aapcs_ncrn;
1465 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1466 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1469 /* C6 - C9. though the sign and zero extension semantics are
1470 handled elsewhere. This is the case where the argument fits
1471 entirely general registers. */
1472 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1474 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1476 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1478 /* C.8 if the argument has an alignment of 16 then the NGRN is
1479 rounded up to the next even number. */
1480 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1482 ++ncrn;
1483 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1485 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1486 A reg is still generated for it, but the caller should be smart
1487 enough not to use it. */
1488 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1490 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1492 else
1494 rtx par;
1495 int i;
1497 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1498 for (i = 0; i < nregs; i++)
1500 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1501 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1502 GEN_INT (i * UNITS_PER_WORD));
1503 XVECEXP (par, 0, i) = tmp;
1505 pcum->aapcs_reg = par;
1508 pcum->aapcs_nextncrn = ncrn + nregs;
1509 return;
1512 /* C.11 */
1513 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1515 /* The argument is passed on stack; record the needed number of words for
1516 this argument (we can re-use NREGS) and align the total size if
1517 necessary. */
1518 on_stack:
1519 pcum->aapcs_stack_words = nregs;
1520 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1521 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1522 16 / UNITS_PER_WORD) + 1;
1523 return;
1526 /* Implement TARGET_FUNCTION_ARG. */
1528 static rtx
1529 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1530 const_tree type, bool named)
1532 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1533 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1535 if (mode == VOIDmode)
1536 return NULL_RTX;
1538 aarch64_layout_arg (pcum_v, mode, type, named);
1539 return pcum->aapcs_reg;
1542 void
1543 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1544 const_tree fntype ATTRIBUTE_UNUSED,
1545 rtx libname ATTRIBUTE_UNUSED,
1546 const_tree fndecl ATTRIBUTE_UNUSED,
1547 unsigned n_named ATTRIBUTE_UNUSED)
1549 pcum->aapcs_ncrn = 0;
1550 pcum->aapcs_nvrn = 0;
1551 pcum->aapcs_nextncrn = 0;
1552 pcum->aapcs_nextnvrn = 0;
1553 pcum->pcs_variant = ARM_PCS_AAPCS64;
1554 pcum->aapcs_reg = NULL_RTX;
1555 pcum->aapcs_arg_processed = false;
1556 pcum->aapcs_stack_words = 0;
1557 pcum->aapcs_stack_size = 0;
1559 return;
1562 static void
1563 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1564 enum machine_mode mode,
1565 const_tree type,
1566 bool named)
1568 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1569 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1571 aarch64_layout_arg (pcum_v, mode, type, named);
1572 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1573 != (pcum->aapcs_stack_words != 0));
1574 pcum->aapcs_arg_processed = false;
1575 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1576 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1577 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1578 pcum->aapcs_stack_words = 0;
1579 pcum->aapcs_reg = NULL_RTX;
1583 bool
1584 aarch64_function_arg_regno_p (unsigned regno)
1586 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1587 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1590 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1591 PARM_BOUNDARY bits of alignment, but will be given anything up
1592 to STACK_BOUNDARY bits if the type requires it. This makes sure
1593 that both before and after the layout of each argument, the Next
1594 Stacked Argument Address (NSAA) will have a minimum alignment of
1595 8 bytes. */
1597 static unsigned int
1598 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1600 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1602 if (alignment < PARM_BOUNDARY)
1603 alignment = PARM_BOUNDARY;
1604 if (alignment > STACK_BOUNDARY)
1605 alignment = STACK_BOUNDARY;
1606 return alignment;
1609 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1611 Return true if an argument passed on the stack should be padded upwards,
1612 i.e. if the least-significant byte of the stack slot has useful data.
1614 Small aggregate types are placed in the lowest memory address.
1616 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1618 bool
1619 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1621 /* On little-endian targets, the least significant byte of every stack
1622 argument is passed at the lowest byte address of the stack slot. */
1623 if (!BYTES_BIG_ENDIAN)
1624 return true;
1626 /* Otherwise, integral, floating-point and pointer types are padded downward:
1627 the least significant byte of a stack argument is passed at the highest
1628 byte address of the stack slot. */
1629 if (type
1630 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1631 || POINTER_TYPE_P (type))
1632 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1633 return false;
1635 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1636 return true;
1639 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1641 It specifies padding for the last (may also be the only)
1642 element of a block move between registers and memory. If
1643 assuming the block is in the memory, padding upward means that
1644 the last element is padded after its highest significant byte,
1645 while in downward padding, the last element is padded at the
1646 its least significant byte side.
1648 Small aggregates and small complex types are always padded
1649 upwards.
1651 We don't need to worry about homogeneous floating-point or
1652 short-vector aggregates; their move is not affected by the
1653 padding direction determined here. Regardless of endianness,
1654 each element of such an aggregate is put in the least
1655 significant bits of a fp/simd register.
1657 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1658 register has useful data, and return the opposite if the most
1659 significant byte does. */
1661 bool
1662 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1663 bool first ATTRIBUTE_UNUSED)
1666 /* Small composite types are always padded upward. */
1667 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1669 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1670 : GET_MODE_SIZE (mode));
1671 if (size < 2 * UNITS_PER_WORD)
1672 return true;
1675 /* Otherwise, use the default padding. */
1676 return !BYTES_BIG_ENDIAN;
1679 static enum machine_mode
1680 aarch64_libgcc_cmp_return_mode (void)
1682 return SImode;
1685 static bool
1686 aarch64_frame_pointer_required (void)
1688 /* If the function contains dynamic stack allocations, we need to
1689 use the frame pointer to access the static parts of the frame. */
1690 if (cfun->calls_alloca)
1691 return true;
1693 /* In aarch64_override_options_after_change
1694 flag_omit_leaf_frame_pointer turns off the frame pointer by
1695 default. Turn it back on now if we've not got a leaf
1696 function. */
1697 if (flag_omit_leaf_frame_pointer
1698 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1699 return true;
1701 return false;
1704 /* Mark the registers that need to be saved by the callee and calculate
1705 the size of the callee-saved registers area and frame record (both FP
1706 and LR may be omitted). */
1707 static void
1708 aarch64_layout_frame (void)
1710 HOST_WIDE_INT offset = 0;
1711 int regno;
1713 if (reload_completed && cfun->machine->frame.laid_out)
1714 return;
1716 cfun->machine->frame.fp_lr_offset = 0;
1718 /* First mark all the registers that really need to be saved... */
1719 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1720 cfun->machine->frame.reg_offset[regno] = -1;
1722 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1723 cfun->machine->frame.reg_offset[regno] = -1;
1725 /* ... that includes the eh data registers (if needed)... */
1726 if (crtl->calls_eh_return)
1727 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1728 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1730 /* ... and any callee saved register that dataflow says is live. */
1731 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1732 if (df_regs_ever_live_p (regno)
1733 && !call_used_regs[regno])
1734 cfun->machine->frame.reg_offset[regno] = 0;
1736 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1737 if (df_regs_ever_live_p (regno)
1738 && !call_used_regs[regno])
1739 cfun->machine->frame.reg_offset[regno] = 0;
1741 if (frame_pointer_needed)
1743 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1744 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1745 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1748 /* Now assign stack slots for them. */
1749 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1750 if (cfun->machine->frame.reg_offset[regno] != -1)
1752 cfun->machine->frame.reg_offset[regno] = offset;
1753 offset += UNITS_PER_WORD;
1756 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1757 if (cfun->machine->frame.reg_offset[regno] != -1)
1759 cfun->machine->frame.reg_offset[regno] = offset;
1760 offset += UNITS_PER_WORD;
1763 if (frame_pointer_needed)
1765 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1766 offset += UNITS_PER_WORD;
1767 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1770 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1772 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1773 offset += UNITS_PER_WORD;
1774 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1777 cfun->machine->frame.padding0 =
1778 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1779 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1781 cfun->machine->frame.saved_regs_size = offset;
1782 cfun->machine->frame.laid_out = true;
1785 /* Make the last instruction frame-related and note that it performs
1786 the operation described by FRAME_PATTERN. */
1788 static void
1789 aarch64_set_frame_expr (rtx frame_pattern)
1791 rtx insn;
1793 insn = get_last_insn ();
1794 RTX_FRAME_RELATED_P (insn) = 1;
1795 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1796 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1797 frame_pattern,
1798 REG_NOTES (insn));
1801 static bool
1802 aarch64_register_saved_on_entry (int regno)
1804 return cfun->machine->frame.reg_offset[regno] != -1;
1808 static void
1809 aarch64_save_or_restore_fprs (int start_offset, int increment,
1810 bool restore, rtx base_rtx)
1813 unsigned regno;
1814 unsigned regno2;
1815 rtx insn;
1816 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1817 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1820 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1822 if (aarch64_register_saved_on_entry (regno))
1824 rtx mem;
1825 mem = gen_mem_ref (DFmode,
1826 plus_constant (Pmode,
1827 base_rtx,
1828 start_offset));
1830 for (regno2 = regno + 1;
1831 regno2 <= V31_REGNUM
1832 && !aarch64_register_saved_on_entry (regno2);
1833 regno2++)
1835 /* Empty loop. */
1837 if (regno2 <= V31_REGNUM &&
1838 aarch64_register_saved_on_entry (regno2))
1840 rtx mem2;
1841 /* Next highest register to be saved. */
1842 mem2 = gen_mem_ref (DFmode,
1843 plus_constant
1844 (Pmode,
1845 base_rtx,
1846 start_offset + increment));
1847 if (restore == false)
1849 insn = emit_insn
1850 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1851 mem2, gen_rtx_REG (DFmode, regno2)));
1854 else
1856 insn = emit_insn
1857 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1858 gen_rtx_REG (DFmode, regno2), mem2));
1860 add_reg_note (insn, REG_CFA_RESTORE,
1861 gen_rtx_REG (DFmode, regno));
1862 add_reg_note (insn, REG_CFA_RESTORE,
1863 gen_rtx_REG (DFmode, regno2));
1866 /* The first part of a frame-related parallel insn
1867 is always assumed to be relevant to the frame
1868 calculations; subsequent parts, are only
1869 frame-related if explicitly marked. */
1870 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1871 regno = regno2;
1872 start_offset += increment * 2;
1874 else
1876 if (restore == false)
1877 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1878 else
1880 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1881 add_reg_note (insn, REG_CFA_RESTORE,
1882 gen_rtx_REG (DImode, regno));
1884 start_offset += increment;
1886 RTX_FRAME_RELATED_P (insn) = 1;
1893 /* offset from the stack pointer of where the saves and
1894 restore's have to happen. */
1895 static void
1896 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1897 bool restore)
1899 rtx insn;
1900 rtx base_rtx = stack_pointer_rtx;
1901 HOST_WIDE_INT start_offset = offset;
1902 HOST_WIDE_INT increment = UNITS_PER_WORD;
1903 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1904 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1905 unsigned regno;
1906 unsigned regno2;
1908 for (regno = R0_REGNUM; regno <= limit; regno++)
1910 if (aarch64_register_saved_on_entry (regno))
1912 rtx mem;
1913 mem = gen_mem_ref (Pmode,
1914 plus_constant (Pmode,
1915 base_rtx,
1916 start_offset));
1918 for (regno2 = regno + 1;
1919 regno2 <= limit
1920 && !aarch64_register_saved_on_entry (regno2);
1921 regno2++)
1923 /* Empty loop. */
1925 if (regno2 <= limit &&
1926 aarch64_register_saved_on_entry (regno2))
1928 rtx mem2;
1929 /* Next highest register to be saved. */
1930 mem2 = gen_mem_ref (Pmode,
1931 plus_constant
1932 (Pmode,
1933 base_rtx,
1934 start_offset + increment));
1935 if (restore == false)
1937 insn = emit_insn
1938 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1939 mem2, gen_rtx_REG (DImode, regno2)));
1942 else
1944 insn = emit_insn
1945 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1946 gen_rtx_REG (DImode, regno2), mem2));
1948 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1949 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1952 /* The first part of a frame-related parallel insn
1953 is always assumed to be relevant to the frame
1954 calculations; subsequent parts, are only
1955 frame-related if explicitly marked. */
1956 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1957 1)) = 1;
1958 regno = regno2;
1959 start_offset += increment * 2;
1961 else
1963 if (restore == false)
1964 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1965 else
1967 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1968 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1970 start_offset += increment;
1972 RTX_FRAME_RELATED_P (insn) = 1;
1976 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1980 /* AArch64 stack frames generated by this compiler look like:
1982 +-------------------------------+
1984 | incoming stack arguments |
1986 +-------------------------------+ <-- arg_pointer_rtx
1988 | callee-allocated save area |
1989 | for register varargs |
1991 +-------------------------------+ <-- frame_pointer_rtx
1993 | local variables |
1995 +-------------------------------+
1996 | padding0 | \
1997 +-------------------------------+ |
1998 | | |
1999 | | |
2000 | callee-saved registers | | frame.saved_regs_size
2001 | | |
2002 +-------------------------------+ |
2003 | LR' | |
2004 +-------------------------------+ |
2005 | FP' | /
2006 P +-------------------------------+ <-- hard_frame_pointer_rtx
2007 | dynamic allocation |
2008 +-------------------------------+
2010 | outgoing stack arguments |
2012 +-------------------------------+ <-- stack_pointer_rtx
2014 Dynamic stack allocations such as alloca insert data at point P.
2015 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2016 hard_frame_pointer_rtx unchanged. */
2018 /* Generate the prologue instructions for entry into a function.
2019 Establish the stack frame by decreasing the stack pointer with a
2020 properly calculated size and, if necessary, create a frame record
2021 filled with the values of LR and previous frame pointer. The
2022 current FP is also set up if it is in use. */
2024 void
2025 aarch64_expand_prologue (void)
2027 /* sub sp, sp, #<frame_size>
2028 stp {fp, lr}, [sp, #<frame_size> - 16]
2029 add fp, sp, #<frame_size> - hardfp_offset
2030 stp {cs_reg}, [fp, #-16] etc.
2032 sub sp, sp, <final_adjustment_if_any>
2034 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2035 HOST_WIDE_INT frame_size, offset;
2036 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2037 rtx insn;
2039 aarch64_layout_frame ();
2040 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2041 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2042 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2043 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2044 + crtl->outgoing_args_size);
2045 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2046 STACK_BOUNDARY / BITS_PER_UNIT);
2048 if (flag_stack_usage_info)
2049 current_function_static_stack_size = frame_size;
2051 fp_offset = (offset
2052 - original_frame_size
2053 - cfun->machine->frame.saved_regs_size);
2055 /* Store pairs and load pairs have a range only -512 to 504. */
2056 if (offset >= 512)
2058 /* When the frame has a large size, an initial decrease is done on
2059 the stack pointer to jump over the callee-allocated save area for
2060 register varargs, the local variable area and/or the callee-saved
2061 register area. This will allow the pre-index write-back
2062 store pair instructions to be used for setting up the stack frame
2063 efficiently. */
2064 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2065 if (offset >= 512)
2066 offset = cfun->machine->frame.saved_regs_size;
2068 frame_size -= (offset + crtl->outgoing_args_size);
2069 fp_offset = 0;
2071 if (frame_size >= 0x1000000)
2073 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2074 emit_move_insn (op0, GEN_INT (-frame_size));
2075 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2076 aarch64_set_frame_expr (gen_rtx_SET
2077 (Pmode, stack_pointer_rtx,
2078 plus_constant (Pmode,
2079 stack_pointer_rtx,
2080 -frame_size)));
2082 else if (frame_size > 0)
2084 if ((frame_size & 0xfff) != frame_size)
2086 insn = emit_insn (gen_add2_insn
2087 (stack_pointer_rtx,
2088 GEN_INT (-(frame_size
2089 & ~(HOST_WIDE_INT)0xfff))));
2090 RTX_FRAME_RELATED_P (insn) = 1;
2092 if ((frame_size & 0xfff) != 0)
2094 insn = emit_insn (gen_add2_insn
2095 (stack_pointer_rtx,
2096 GEN_INT (-(frame_size
2097 & (HOST_WIDE_INT)0xfff))));
2098 RTX_FRAME_RELATED_P (insn) = 1;
2102 else
2103 frame_size = -1;
2105 if (offset > 0)
2107 /* Save the frame pointer and lr if the frame pointer is needed
2108 first. Make the frame pointer point to the location of the
2109 old frame pointer on the stack. */
2110 if (frame_pointer_needed)
2112 rtx mem_fp, mem_lr;
2114 if (fp_offset)
2116 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2117 GEN_INT (-offset)));
2118 RTX_FRAME_RELATED_P (insn) = 1;
2119 aarch64_set_frame_expr (gen_rtx_SET
2120 (Pmode, stack_pointer_rtx,
2121 gen_rtx_MINUS (Pmode,
2122 stack_pointer_rtx,
2123 GEN_INT (offset))));
2124 mem_fp = gen_frame_mem (DImode,
2125 plus_constant (Pmode,
2126 stack_pointer_rtx,
2127 fp_offset));
2128 mem_lr = gen_frame_mem (DImode,
2129 plus_constant (Pmode,
2130 stack_pointer_rtx,
2131 fp_offset
2132 + UNITS_PER_WORD));
2133 insn = emit_insn (gen_store_pairdi (mem_fp,
2134 hard_frame_pointer_rtx,
2135 mem_lr,
2136 gen_rtx_REG (DImode,
2137 LR_REGNUM)));
2139 else
2141 insn = emit_insn (gen_storewb_pairdi_di
2142 (stack_pointer_rtx, stack_pointer_rtx,
2143 hard_frame_pointer_rtx,
2144 gen_rtx_REG (DImode, LR_REGNUM),
2145 GEN_INT (-offset),
2146 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2147 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2150 /* The first part of a frame-related parallel insn is always
2151 assumed to be relevant to the frame calculations;
2152 subsequent parts, are only frame-related if explicitly
2153 marked. */
2154 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2155 RTX_FRAME_RELATED_P (insn) = 1;
2157 /* Set up frame pointer to point to the location of the
2158 previous frame pointer on the stack. */
2159 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2160 stack_pointer_rtx,
2161 GEN_INT (fp_offset)));
2162 aarch64_set_frame_expr (gen_rtx_SET
2163 (Pmode, hard_frame_pointer_rtx,
2164 plus_constant (Pmode,
2165 stack_pointer_rtx,
2166 fp_offset)));
2167 RTX_FRAME_RELATED_P (insn) = 1;
2168 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2169 hard_frame_pointer_rtx));
2171 else
2173 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2174 GEN_INT (-offset)));
2175 RTX_FRAME_RELATED_P (insn) = 1;
2178 aarch64_save_or_restore_callee_save_registers
2179 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2182 /* when offset >= 512,
2183 sub sp, sp, #<outgoing_args_size> */
2184 if (frame_size > -1)
2186 if (crtl->outgoing_args_size > 0)
2188 insn = emit_insn (gen_add2_insn
2189 (stack_pointer_rtx,
2190 GEN_INT (- crtl->outgoing_args_size)));
2191 RTX_FRAME_RELATED_P (insn) = 1;
2196 /* Generate the epilogue instructions for returning from a function. */
2197 void
2198 aarch64_expand_epilogue (bool for_sibcall)
2200 HOST_WIDE_INT original_frame_size, frame_size, offset;
2201 HOST_WIDE_INT fp_offset;
2202 rtx insn;
2203 rtx cfa_reg;
2205 aarch64_layout_frame ();
2206 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2207 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2208 + crtl->outgoing_args_size);
2209 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2210 STACK_BOUNDARY / BITS_PER_UNIT);
2212 fp_offset = (offset
2213 - original_frame_size
2214 - cfun->machine->frame.saved_regs_size);
2216 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2218 /* Store pairs and load pairs have a range only -512 to 504. */
2219 if (offset >= 512)
2221 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2222 if (offset >= 512)
2223 offset = cfun->machine->frame.saved_regs_size;
2225 frame_size -= (offset + crtl->outgoing_args_size);
2226 fp_offset = 0;
2227 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2229 insn = emit_insn (gen_add2_insn
2230 (stack_pointer_rtx,
2231 GEN_INT (crtl->outgoing_args_size)));
2232 RTX_FRAME_RELATED_P (insn) = 1;
2235 else
2236 frame_size = -1;
2238 /* If there were outgoing arguments or we've done dynamic stack
2239 allocation, then restore the stack pointer from the frame
2240 pointer. This is at most one insn and more efficient than using
2241 GCC's internal mechanism. */
2242 if (frame_pointer_needed
2243 && (crtl->outgoing_args_size || cfun->calls_alloca))
2245 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2246 hard_frame_pointer_rtx,
2247 GEN_INT (- fp_offset)));
2248 RTX_FRAME_RELATED_P (insn) = 1;
2249 /* As SP is set to (FP - fp_offset), according to the rules in
2250 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2251 from the value of SP from now on. */
2252 cfa_reg = stack_pointer_rtx;
2255 aarch64_save_or_restore_callee_save_registers
2256 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2258 /* Restore the frame pointer and lr if the frame pointer is needed. */
2259 if (offset > 0)
2261 if (frame_pointer_needed)
2263 rtx mem_fp, mem_lr;
2265 if (fp_offset)
2267 mem_fp = gen_frame_mem (DImode,
2268 plus_constant (Pmode,
2269 stack_pointer_rtx,
2270 fp_offset));
2271 mem_lr = gen_frame_mem (DImode,
2272 plus_constant (Pmode,
2273 stack_pointer_rtx,
2274 fp_offset
2275 + UNITS_PER_WORD));
2276 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2277 mem_fp,
2278 gen_rtx_REG (DImode,
2279 LR_REGNUM),
2280 mem_lr));
2282 else
2284 insn = emit_insn (gen_loadwb_pairdi_di
2285 (stack_pointer_rtx,
2286 stack_pointer_rtx,
2287 hard_frame_pointer_rtx,
2288 gen_rtx_REG (DImode, LR_REGNUM),
2289 GEN_INT (offset),
2290 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2291 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2292 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2293 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2294 plus_constant (Pmode, cfa_reg,
2295 offset))));
2298 /* The first part of a frame-related parallel insn
2299 is always assumed to be relevant to the frame
2300 calculations; subsequent parts, are only
2301 frame-related if explicitly marked. */
2302 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2303 RTX_FRAME_RELATED_P (insn) = 1;
2304 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2305 add_reg_note (insn, REG_CFA_RESTORE,
2306 gen_rtx_REG (DImode, LR_REGNUM));
2308 if (fp_offset)
2310 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2311 GEN_INT (offset)));
2312 RTX_FRAME_RELATED_P (insn) = 1;
2315 else
2317 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2318 GEN_INT (offset)));
2319 RTX_FRAME_RELATED_P (insn) = 1;
2323 /* Stack adjustment for exception handler. */
2324 if (crtl->calls_eh_return)
2326 /* We need to unwind the stack by the offset computed by
2327 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2328 based on SP. Ideally we would update the SP and define the
2329 CFA along the lines of:
2331 SP = SP + EH_RETURN_STACKADJ_RTX
2332 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2334 However the dwarf emitter only understands a constant
2335 register offset.
2337 The solution chosen here is to use the otherwise unused IP0
2338 as a temporary register to hold the current SP value. The
2339 CFA is described using IP0 then SP is modified. */
2341 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2343 insn = emit_move_insn (ip0, stack_pointer_rtx);
2344 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2345 RTX_FRAME_RELATED_P (insn) = 1;
2347 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2349 /* Ensure the assignment to IP0 does not get optimized away. */
2350 emit_use (ip0);
2353 if (frame_size > -1)
2355 if (frame_size >= 0x1000000)
2357 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2358 emit_move_insn (op0, GEN_INT (frame_size));
2359 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2360 aarch64_set_frame_expr (gen_rtx_SET
2361 (Pmode, stack_pointer_rtx,
2362 plus_constant (Pmode,
2363 stack_pointer_rtx,
2364 frame_size)));
2366 else if (frame_size > 0)
2368 if ((frame_size & 0xfff) != 0)
2370 insn = emit_insn (gen_add2_insn
2371 (stack_pointer_rtx,
2372 GEN_INT ((frame_size
2373 & (HOST_WIDE_INT) 0xfff))));
2374 RTX_FRAME_RELATED_P (insn) = 1;
2376 if ((frame_size & 0xfff) != frame_size)
2378 insn = emit_insn (gen_add2_insn
2379 (stack_pointer_rtx,
2380 GEN_INT ((frame_size
2381 & ~ (HOST_WIDE_INT) 0xfff))));
2382 RTX_FRAME_RELATED_P (insn) = 1;
2386 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2387 plus_constant (Pmode,
2388 stack_pointer_rtx,
2389 offset)));
2392 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2393 if (!for_sibcall)
2394 emit_jump_insn (ret_rtx);
2397 /* Return the place to copy the exception unwinding return address to.
2398 This will probably be a stack slot, but could (in theory be the
2399 return register). */
2401 aarch64_final_eh_return_addr (void)
2403 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2404 aarch64_layout_frame ();
2405 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2406 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2407 + crtl->outgoing_args_size);
2408 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2409 STACK_BOUNDARY / BITS_PER_UNIT);
2410 fp_offset = offset
2411 - original_frame_size
2412 - cfun->machine->frame.saved_regs_size;
2414 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2415 return gen_rtx_REG (DImode, LR_REGNUM);
2417 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2418 result in a store to save LR introduced by builtin_eh_return () being
2419 incorrectly deleted because the alias is not detected.
2420 So in the calculation of the address to copy the exception unwinding
2421 return address to, we note 2 cases.
2422 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2423 we return a SP-relative location since all the addresses are SP-relative
2424 in this case. This prevents the store from being optimized away.
2425 If the fp_offset is not 0, then the addresses will be FP-relative and
2426 therefore we return a FP-relative location. */
2428 if (frame_pointer_needed)
2430 if (fp_offset)
2431 return gen_frame_mem (DImode,
2432 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2433 else
2434 return gen_frame_mem (DImode,
2435 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2438 /* If FP is not needed, we calculate the location of LR, which would be
2439 at the top of the saved registers block. */
2441 return gen_frame_mem (DImode,
2442 plus_constant (Pmode,
2443 stack_pointer_rtx,
2444 fp_offset
2445 + cfun->machine->frame.saved_regs_size
2446 - 2 * UNITS_PER_WORD));
2449 /* Output code to build up a constant in a register. */
2450 static void
2451 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2453 if (aarch64_bitmask_imm (val, DImode))
2454 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2455 else
2457 int i;
2458 int ncount = 0;
2459 int zcount = 0;
2460 HOST_WIDE_INT valp = val >> 16;
2461 HOST_WIDE_INT valm;
2462 HOST_WIDE_INT tval;
2464 for (i = 16; i < 64; i += 16)
2466 valm = (valp & 0xffff);
2468 if (valm != 0)
2469 ++ zcount;
2471 if (valm != 0xffff)
2472 ++ ncount;
2474 valp >>= 16;
2477 /* zcount contains the number of additional MOVK instructions
2478 required if the constant is built up with an initial MOVZ instruction,
2479 while ncount is the number of MOVK instructions required if starting
2480 with a MOVN instruction. Choose the sequence that yields the fewest
2481 number of instructions, preferring MOVZ instructions when they are both
2482 the same. */
2483 if (ncount < zcount)
2485 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2486 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2487 tval = 0xffff;
2489 else
2491 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2492 GEN_INT (val & 0xffff));
2493 tval = 0;
2496 val >>= 16;
2498 for (i = 16; i < 64; i += 16)
2500 if ((val & 0xffff) != tval)
2501 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2502 GEN_INT (i), GEN_INT (val & 0xffff)));
2503 val >>= 16;
2508 static void
2509 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2511 HOST_WIDE_INT mdelta = delta;
2512 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2513 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2515 if (mdelta < 0)
2516 mdelta = -mdelta;
2518 if (mdelta >= 4096 * 4096)
2520 aarch64_build_constant (scratchreg, delta);
2521 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2523 else if (mdelta > 0)
2525 if (mdelta >= 4096)
2527 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2528 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2529 if (delta < 0)
2530 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2532 else
2533 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2536 if (mdelta % 4096 != 0)
2538 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2539 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2540 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2545 /* Output code to add DELTA to the first argument, and then jump
2546 to FUNCTION. Used for C++ multiple inheritance. */
2547 static void
2548 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2549 HOST_WIDE_INT delta,
2550 HOST_WIDE_INT vcall_offset,
2551 tree function)
2553 /* The this pointer is always in x0. Note that this differs from
2554 Arm where the this pointer maybe bumped to r1 if r0 is required
2555 to return a pointer to an aggregate. On AArch64 a result value
2556 pointer will be in x8. */
2557 int this_regno = R0_REGNUM;
2558 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2560 reload_completed = 1;
2561 emit_note (NOTE_INSN_PROLOGUE_END);
2563 if (vcall_offset == 0)
2564 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2565 else
2567 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2569 this_rtx = gen_rtx_REG (Pmode, this_regno);
2570 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2571 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2573 addr = this_rtx;
2574 if (delta != 0)
2576 if (delta >= -256 && delta < 256)
2577 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2578 plus_constant (Pmode, this_rtx, delta));
2579 else
2580 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2583 if (Pmode == ptr_mode)
2584 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2585 else
2586 aarch64_emit_move (temp0,
2587 gen_rtx_ZERO_EXTEND (Pmode,
2588 gen_rtx_MEM (ptr_mode, addr)));
2590 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2591 addr = plus_constant (Pmode, temp0, vcall_offset);
2592 else
2594 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2595 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2598 if (Pmode == ptr_mode)
2599 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2600 else
2601 aarch64_emit_move (temp1,
2602 gen_rtx_SIGN_EXTEND (Pmode,
2603 gen_rtx_MEM (ptr_mode, addr)));
2605 emit_insn (gen_add2_insn (this_rtx, temp1));
2608 /* Generate a tail call to the target function. */
2609 if (!TREE_USED (function))
2611 assemble_external (function);
2612 TREE_USED (function) = 1;
2614 funexp = XEXP (DECL_RTL (function), 0);
2615 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2616 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2617 SIBLING_CALL_P (insn) = 1;
2619 insn = get_insns ();
2620 shorten_branches (insn);
2621 final_start_function (insn, file, 1);
2622 final (insn, file, 1);
2623 final_end_function ();
2625 /* Stop pretending to be a post-reload pass. */
2626 reload_completed = 0;
2629 static int
2630 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2632 if (GET_CODE (*x) == SYMBOL_REF)
2633 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2635 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2636 TLS offsets, not real symbol references. */
2637 if (GET_CODE (*x) == UNSPEC
2638 && XINT (*x, 1) == UNSPEC_TLS)
2639 return -1;
2641 return 0;
2644 static bool
2645 aarch64_tls_referenced_p (rtx x)
2647 if (!TARGET_HAVE_TLS)
2648 return false;
2650 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2654 static int
2655 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2657 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2658 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2660 if (*imm1 < *imm2)
2661 return -1;
2662 if (*imm1 > *imm2)
2663 return +1;
2664 return 0;
2668 static void
2669 aarch64_build_bitmask_table (void)
2671 unsigned HOST_WIDE_INT mask, imm;
2672 unsigned int log_e, e, s, r;
2673 unsigned int nimms = 0;
2675 for (log_e = 1; log_e <= 6; log_e++)
2677 e = 1 << log_e;
2678 if (e == 64)
2679 mask = ~(HOST_WIDE_INT) 0;
2680 else
2681 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2682 for (s = 1; s < e; s++)
2684 for (r = 0; r < e; r++)
2686 /* set s consecutive bits to 1 (s < 64) */
2687 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2688 /* rotate right by r */
2689 if (r != 0)
2690 imm = ((imm >> r) | (imm << (e - r))) & mask;
2691 /* replicate the constant depending on SIMD size */
2692 switch (log_e) {
2693 case 1: imm |= (imm << 2);
2694 case 2: imm |= (imm << 4);
2695 case 3: imm |= (imm << 8);
2696 case 4: imm |= (imm << 16);
2697 case 5: imm |= (imm << 32);
2698 case 6:
2699 break;
2700 default:
2701 gcc_unreachable ();
2703 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2704 aarch64_bitmasks[nimms++] = imm;
2709 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2710 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2711 aarch64_bitmasks_cmp);
2715 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2716 a left shift of 0 or 12 bits. */
2717 bool
2718 aarch64_uimm12_shift (HOST_WIDE_INT val)
2720 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2721 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2726 /* Return true if val is an immediate that can be loaded into a
2727 register by a MOVZ instruction. */
2728 static bool
2729 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2731 if (GET_MODE_SIZE (mode) > 4)
2733 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2734 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2735 return 1;
2737 else
2739 /* Ignore sign extension. */
2740 val &= (HOST_WIDE_INT) 0xffffffff;
2742 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2743 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2747 /* Return true if val is a valid bitmask immediate. */
2748 bool
2749 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2751 if (GET_MODE_SIZE (mode) < 8)
2753 /* Replicate bit pattern. */
2754 val &= (HOST_WIDE_INT) 0xffffffff;
2755 val |= val << 32;
2757 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2758 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2762 /* Return true if val is an immediate that can be loaded into a
2763 register in a single instruction. */
2764 bool
2765 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2767 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2768 return 1;
2769 return aarch64_bitmask_imm (val, mode);
2772 static bool
2773 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2775 rtx base, offset;
2777 if (GET_CODE (x) == HIGH)
2778 return true;
2780 split_const (x, &base, &offset);
2781 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2783 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2784 != SYMBOL_FORCE_TO_MEM)
2785 return true;
2786 else
2787 /* Avoid generating a 64-bit relocation in ILP32; leave
2788 to aarch64_expand_mov_immediate to handle it properly. */
2789 return mode != ptr_mode;
2792 return aarch64_tls_referenced_p (x);
2795 /* Return true if register REGNO is a valid index register.
2796 STRICT_P is true if REG_OK_STRICT is in effect. */
2798 bool
2799 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2801 if (!HARD_REGISTER_NUM_P (regno))
2803 if (!strict_p)
2804 return true;
2806 if (!reg_renumber)
2807 return false;
2809 regno = reg_renumber[regno];
2811 return GP_REGNUM_P (regno);
2814 /* Return true if register REGNO is a valid base register for mode MODE.
2815 STRICT_P is true if REG_OK_STRICT is in effect. */
2817 bool
2818 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2820 if (!HARD_REGISTER_NUM_P (regno))
2822 if (!strict_p)
2823 return true;
2825 if (!reg_renumber)
2826 return false;
2828 regno = reg_renumber[regno];
2831 /* The fake registers will be eliminated to either the stack or
2832 hard frame pointer, both of which are usually valid base registers.
2833 Reload deals with the cases where the eliminated form isn't valid. */
2834 return (GP_REGNUM_P (regno)
2835 || regno == SP_REGNUM
2836 || regno == FRAME_POINTER_REGNUM
2837 || regno == ARG_POINTER_REGNUM);
2840 /* Return true if X is a valid base register for mode MODE.
2841 STRICT_P is true if REG_OK_STRICT is in effect. */
2843 static bool
2844 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2846 if (!strict_p && GET_CODE (x) == SUBREG)
2847 x = SUBREG_REG (x);
2849 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2852 /* Return true if address offset is a valid index. If it is, fill in INFO
2853 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2855 static bool
2856 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2857 enum machine_mode mode, bool strict_p)
2859 enum aarch64_address_type type;
2860 rtx index;
2861 int shift;
2863 /* (reg:P) */
2864 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2865 && GET_MODE (x) == Pmode)
2867 type = ADDRESS_REG_REG;
2868 index = x;
2869 shift = 0;
2871 /* (sign_extend:DI (reg:SI)) */
2872 else if ((GET_CODE (x) == SIGN_EXTEND
2873 || GET_CODE (x) == ZERO_EXTEND)
2874 && GET_MODE (x) == DImode
2875 && GET_MODE (XEXP (x, 0)) == SImode)
2877 type = (GET_CODE (x) == SIGN_EXTEND)
2878 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2879 index = XEXP (x, 0);
2880 shift = 0;
2882 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2883 else if (GET_CODE (x) == MULT
2884 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2885 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2886 && GET_MODE (XEXP (x, 0)) == DImode
2887 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2888 && CONST_INT_P (XEXP (x, 1)))
2890 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2891 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2892 index = XEXP (XEXP (x, 0), 0);
2893 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2895 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2896 else if (GET_CODE (x) == ASHIFT
2897 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2898 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2899 && GET_MODE (XEXP (x, 0)) == DImode
2900 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2901 && CONST_INT_P (XEXP (x, 1)))
2903 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2904 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2905 index = XEXP (XEXP (x, 0), 0);
2906 shift = INTVAL (XEXP (x, 1));
2908 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2909 else if ((GET_CODE (x) == SIGN_EXTRACT
2910 || GET_CODE (x) == ZERO_EXTRACT)
2911 && GET_MODE (x) == DImode
2912 && GET_CODE (XEXP (x, 0)) == MULT
2913 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2914 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2916 type = (GET_CODE (x) == SIGN_EXTRACT)
2917 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2918 index = XEXP (XEXP (x, 0), 0);
2919 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2920 if (INTVAL (XEXP (x, 1)) != 32 + shift
2921 || INTVAL (XEXP (x, 2)) != 0)
2922 shift = -1;
2924 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2925 (const_int 0xffffffff<<shift)) */
2926 else if (GET_CODE (x) == AND
2927 && GET_MODE (x) == DImode
2928 && GET_CODE (XEXP (x, 0)) == MULT
2929 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2930 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2931 && CONST_INT_P (XEXP (x, 1)))
2933 type = ADDRESS_REG_UXTW;
2934 index = XEXP (XEXP (x, 0), 0);
2935 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2936 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2937 shift = -1;
2939 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2940 else if ((GET_CODE (x) == SIGN_EXTRACT
2941 || GET_CODE (x) == ZERO_EXTRACT)
2942 && GET_MODE (x) == DImode
2943 && GET_CODE (XEXP (x, 0)) == ASHIFT
2944 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2945 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2947 type = (GET_CODE (x) == SIGN_EXTRACT)
2948 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2949 index = XEXP (XEXP (x, 0), 0);
2950 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2951 if (INTVAL (XEXP (x, 1)) != 32 + shift
2952 || INTVAL (XEXP (x, 2)) != 0)
2953 shift = -1;
2955 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2956 (const_int 0xffffffff<<shift)) */
2957 else if (GET_CODE (x) == AND
2958 && GET_MODE (x) == DImode
2959 && GET_CODE (XEXP (x, 0)) == ASHIFT
2960 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2961 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2962 && CONST_INT_P (XEXP (x, 1)))
2964 type = ADDRESS_REG_UXTW;
2965 index = XEXP (XEXP (x, 0), 0);
2966 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2967 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2968 shift = -1;
2970 /* (mult:P (reg:P) (const_int scale)) */
2971 else if (GET_CODE (x) == MULT
2972 && GET_MODE (x) == Pmode
2973 && GET_MODE (XEXP (x, 0)) == Pmode
2974 && CONST_INT_P (XEXP (x, 1)))
2976 type = ADDRESS_REG_REG;
2977 index = XEXP (x, 0);
2978 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2980 /* (ashift:P (reg:P) (const_int shift)) */
2981 else if (GET_CODE (x) == ASHIFT
2982 && GET_MODE (x) == Pmode
2983 && GET_MODE (XEXP (x, 0)) == Pmode
2984 && CONST_INT_P (XEXP (x, 1)))
2986 type = ADDRESS_REG_REG;
2987 index = XEXP (x, 0);
2988 shift = INTVAL (XEXP (x, 1));
2990 else
2991 return false;
2993 if (GET_CODE (index) == SUBREG)
2994 index = SUBREG_REG (index);
2996 if ((shift == 0 ||
2997 (shift > 0 && shift <= 3
2998 && (1 << shift) == GET_MODE_SIZE (mode)))
2999 && REG_P (index)
3000 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3002 info->type = type;
3003 info->offset = index;
3004 info->shift = shift;
3005 return true;
3008 return false;
3011 static inline bool
3012 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3014 return (offset >= -64 * GET_MODE_SIZE (mode)
3015 && offset < 64 * GET_MODE_SIZE (mode)
3016 && offset % GET_MODE_SIZE (mode) == 0);
3019 static inline bool
3020 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3021 HOST_WIDE_INT offset)
3023 return offset >= -256 && offset < 256;
3026 static inline bool
3027 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3029 return (offset >= 0
3030 && offset < 4096 * GET_MODE_SIZE (mode)
3031 && offset % GET_MODE_SIZE (mode) == 0);
3034 /* Return true if X is a valid address for machine mode MODE. If it is,
3035 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3036 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3038 static bool
3039 aarch64_classify_address (struct aarch64_address_info *info,
3040 rtx x, enum machine_mode mode,
3041 RTX_CODE outer_code, bool strict_p)
3043 enum rtx_code code = GET_CODE (x);
3044 rtx op0, op1;
3045 bool allow_reg_index_p =
3046 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3048 /* Don't support anything other than POST_INC or REG addressing for
3049 AdvSIMD. */
3050 if (aarch64_vector_mode_p (mode)
3051 && (code != POST_INC && code != REG))
3052 return false;
3054 switch (code)
3056 case REG:
3057 case SUBREG:
3058 info->type = ADDRESS_REG_IMM;
3059 info->base = x;
3060 info->offset = const0_rtx;
3061 return aarch64_base_register_rtx_p (x, strict_p);
3063 case PLUS:
3064 op0 = XEXP (x, 0);
3065 op1 = XEXP (x, 1);
3066 if (GET_MODE_SIZE (mode) != 0
3067 && CONST_INT_P (op1)
3068 && aarch64_base_register_rtx_p (op0, strict_p))
3070 HOST_WIDE_INT offset = INTVAL (op1);
3072 info->type = ADDRESS_REG_IMM;
3073 info->base = op0;
3074 info->offset = op1;
3076 /* TImode and TFmode values are allowed in both pairs of X
3077 registers and individual Q registers. The available
3078 address modes are:
3079 X,X: 7-bit signed scaled offset
3080 Q: 9-bit signed offset
3081 We conservatively require an offset representable in either mode.
3083 if (mode == TImode || mode == TFmode)
3084 return (offset_7bit_signed_scaled_p (mode, offset)
3085 && offset_9bit_signed_unscaled_p (mode, offset));
3087 if (outer_code == PARALLEL)
3088 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3089 && offset_7bit_signed_scaled_p (mode, offset));
3090 else
3091 return (offset_9bit_signed_unscaled_p (mode, offset)
3092 || offset_12bit_unsigned_scaled_p (mode, offset));
3095 if (allow_reg_index_p)
3097 /* Look for base + (scaled/extended) index register. */
3098 if (aarch64_base_register_rtx_p (op0, strict_p)
3099 && aarch64_classify_index (info, op1, mode, strict_p))
3101 info->base = op0;
3102 return true;
3104 if (aarch64_base_register_rtx_p (op1, strict_p)
3105 && aarch64_classify_index (info, op0, mode, strict_p))
3107 info->base = op1;
3108 return true;
3112 return false;
3114 case POST_INC:
3115 case POST_DEC:
3116 case PRE_INC:
3117 case PRE_DEC:
3118 info->type = ADDRESS_REG_WB;
3119 info->base = XEXP (x, 0);
3120 info->offset = NULL_RTX;
3121 return aarch64_base_register_rtx_p (info->base, strict_p);
3123 case POST_MODIFY:
3124 case PRE_MODIFY:
3125 info->type = ADDRESS_REG_WB;
3126 info->base = XEXP (x, 0);
3127 if (GET_CODE (XEXP (x, 1)) == PLUS
3128 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3129 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3130 && aarch64_base_register_rtx_p (info->base, strict_p))
3132 HOST_WIDE_INT offset;
3133 info->offset = XEXP (XEXP (x, 1), 1);
3134 offset = INTVAL (info->offset);
3136 /* TImode and TFmode values are allowed in both pairs of X
3137 registers and individual Q registers. The available
3138 address modes are:
3139 X,X: 7-bit signed scaled offset
3140 Q: 9-bit signed offset
3141 We conservatively require an offset representable in either mode.
3143 if (mode == TImode || mode == TFmode)
3144 return (offset_7bit_signed_scaled_p (mode, offset)
3145 && offset_9bit_signed_unscaled_p (mode, offset));
3147 if (outer_code == PARALLEL)
3148 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3149 && offset_7bit_signed_scaled_p (mode, offset));
3150 else
3151 return offset_9bit_signed_unscaled_p (mode, offset);
3153 return false;
3155 case CONST:
3156 case SYMBOL_REF:
3157 case LABEL_REF:
3158 /* load literal: pc-relative constant pool entry. Only supported
3159 for SI mode or larger. */
3160 info->type = ADDRESS_SYMBOLIC;
3161 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3163 rtx sym, addend;
3165 split_const (x, &sym, &addend);
3166 return (GET_CODE (sym) == LABEL_REF
3167 || (GET_CODE (sym) == SYMBOL_REF
3168 && CONSTANT_POOL_ADDRESS_P (sym)));
3170 return false;
3172 case LO_SUM:
3173 info->type = ADDRESS_LO_SUM;
3174 info->base = XEXP (x, 0);
3175 info->offset = XEXP (x, 1);
3176 if (allow_reg_index_p
3177 && aarch64_base_register_rtx_p (info->base, strict_p))
3179 rtx sym, offs;
3180 split_const (info->offset, &sym, &offs);
3181 if (GET_CODE (sym) == SYMBOL_REF
3182 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3183 == SYMBOL_SMALL_ABSOLUTE))
3185 /* The symbol and offset must be aligned to the access size. */
3186 unsigned int align;
3187 unsigned int ref_size;
3189 if (CONSTANT_POOL_ADDRESS_P (sym))
3190 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3191 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3193 tree exp = SYMBOL_REF_DECL (sym);
3194 align = TYPE_ALIGN (TREE_TYPE (exp));
3195 align = CONSTANT_ALIGNMENT (exp, align);
3197 else if (SYMBOL_REF_DECL (sym))
3198 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3199 else
3200 align = BITS_PER_UNIT;
3202 ref_size = GET_MODE_SIZE (mode);
3203 if (ref_size == 0)
3204 ref_size = GET_MODE_SIZE (DImode);
3206 return ((INTVAL (offs) & (ref_size - 1)) == 0
3207 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3210 return false;
3212 default:
3213 return false;
3217 bool
3218 aarch64_symbolic_address_p (rtx x)
3220 rtx offset;
3222 split_const (x, &x, &offset);
3223 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3226 /* Classify the base of symbolic expression X, given that X appears in
3227 context CONTEXT. */
3229 enum aarch64_symbol_type
3230 aarch64_classify_symbolic_expression (rtx x,
3231 enum aarch64_symbol_context context)
3233 rtx offset;
3235 split_const (x, &x, &offset);
3236 return aarch64_classify_symbol (x, context);
3240 /* Return TRUE if X is a legitimate address for accessing memory in
3241 mode MODE. */
3242 static bool
3243 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3245 struct aarch64_address_info addr;
3247 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3250 /* Return TRUE if X is a legitimate address for accessing memory in
3251 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3252 pair operation. */
3253 bool
3254 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3255 RTX_CODE outer_code, bool strict_p)
3257 struct aarch64_address_info addr;
3259 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3262 /* Return TRUE if rtx X is immediate constant 0.0 */
3263 bool
3264 aarch64_float_const_zero_rtx_p (rtx x)
3266 REAL_VALUE_TYPE r;
3268 if (GET_MODE (x) == VOIDmode)
3269 return false;
3271 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3272 if (REAL_VALUE_MINUS_ZERO (r))
3273 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3274 return REAL_VALUES_EQUAL (r, dconst0);
3277 /* Return the fixed registers used for condition codes. */
3279 static bool
3280 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3282 *p1 = CC_REGNUM;
3283 *p2 = INVALID_REGNUM;
3284 return true;
3287 enum machine_mode
3288 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3290 /* All floating point compares return CCFP if it is an equality
3291 comparison, and CCFPE otherwise. */
3292 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3294 switch (code)
3296 case EQ:
3297 case NE:
3298 case UNORDERED:
3299 case ORDERED:
3300 case UNLT:
3301 case UNLE:
3302 case UNGT:
3303 case UNGE:
3304 case UNEQ:
3305 case LTGT:
3306 return CCFPmode;
3308 case LT:
3309 case LE:
3310 case GT:
3311 case GE:
3312 return CCFPEmode;
3314 default:
3315 gcc_unreachable ();
3319 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3320 && y == const0_rtx
3321 && (code == EQ || code == NE || code == LT || code == GE)
3322 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3323 || GET_CODE (x) == NEG))
3324 return CC_NZmode;
3326 /* A compare with a shifted operand. Because of canonicalization,
3327 the comparison will have to be swapped when we emit the assembly
3328 code. */
3329 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3330 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3331 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3332 || GET_CODE (x) == LSHIFTRT
3333 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3334 return CC_SWPmode;
3336 /* Similarly for a negated operand, but we can only do this for
3337 equalities. */
3338 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3339 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3340 && (code == EQ || code == NE)
3341 && GET_CODE (x) == NEG)
3342 return CC_Zmode;
3344 /* A compare of a mode narrower than SI mode against zero can be done
3345 by extending the value in the comparison. */
3346 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3347 && y == const0_rtx)
3348 /* Only use sign-extension if we really need it. */
3349 return ((code == GT || code == GE || code == LE || code == LT)
3350 ? CC_SESWPmode : CC_ZESWPmode);
3352 /* For everything else, return CCmode. */
3353 return CCmode;
3356 static unsigned
3357 aarch64_get_condition_code (rtx x)
3359 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3360 enum rtx_code comp_code = GET_CODE (x);
3362 if (GET_MODE_CLASS (mode) != MODE_CC)
3363 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3365 switch (mode)
3367 case CCFPmode:
3368 case CCFPEmode:
3369 switch (comp_code)
3371 case GE: return AARCH64_GE;
3372 case GT: return AARCH64_GT;
3373 case LE: return AARCH64_LS;
3374 case LT: return AARCH64_MI;
3375 case NE: return AARCH64_NE;
3376 case EQ: return AARCH64_EQ;
3377 case ORDERED: return AARCH64_VC;
3378 case UNORDERED: return AARCH64_VS;
3379 case UNLT: return AARCH64_LT;
3380 case UNLE: return AARCH64_LE;
3381 case UNGT: return AARCH64_HI;
3382 case UNGE: return AARCH64_PL;
3383 default: gcc_unreachable ();
3385 break;
3387 case CCmode:
3388 switch (comp_code)
3390 case NE: return AARCH64_NE;
3391 case EQ: return AARCH64_EQ;
3392 case GE: return AARCH64_GE;
3393 case GT: return AARCH64_GT;
3394 case LE: return AARCH64_LE;
3395 case LT: return AARCH64_LT;
3396 case GEU: return AARCH64_CS;
3397 case GTU: return AARCH64_HI;
3398 case LEU: return AARCH64_LS;
3399 case LTU: return AARCH64_CC;
3400 default: gcc_unreachable ();
3402 break;
3404 case CC_SWPmode:
3405 case CC_ZESWPmode:
3406 case CC_SESWPmode:
3407 switch (comp_code)
3409 case NE: return AARCH64_NE;
3410 case EQ: return AARCH64_EQ;
3411 case GE: return AARCH64_LE;
3412 case GT: return AARCH64_LT;
3413 case LE: return AARCH64_GE;
3414 case LT: return AARCH64_GT;
3415 case GEU: return AARCH64_LS;
3416 case GTU: return AARCH64_CC;
3417 case LEU: return AARCH64_CS;
3418 case LTU: return AARCH64_HI;
3419 default: gcc_unreachable ();
3421 break;
3423 case CC_NZmode:
3424 switch (comp_code)
3426 case NE: return AARCH64_NE;
3427 case EQ: return AARCH64_EQ;
3428 case GE: return AARCH64_PL;
3429 case LT: return AARCH64_MI;
3430 default: gcc_unreachable ();
3432 break;
3434 case CC_Zmode:
3435 switch (comp_code)
3437 case NE: return AARCH64_NE;
3438 case EQ: return AARCH64_EQ;
3439 default: gcc_unreachable ();
3441 break;
3443 default:
3444 gcc_unreachable ();
3445 break;
3449 static unsigned
3450 bit_count (unsigned HOST_WIDE_INT value)
3452 unsigned count = 0;
3454 while (value)
3456 count++;
3457 value &= value - 1;
3460 return count;
3463 void
3464 aarch64_print_operand (FILE *f, rtx x, char code)
3466 switch (code)
3468 /* An integer or symbol address without a preceding # sign. */
3469 case 'c':
3470 switch (GET_CODE (x))
3472 case CONST_INT:
3473 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3474 break;
3476 case SYMBOL_REF:
3477 output_addr_const (f, x);
3478 break;
3480 case CONST:
3481 if (GET_CODE (XEXP (x, 0)) == PLUS
3482 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3484 output_addr_const (f, x);
3485 break;
3487 /* Fall through. */
3489 default:
3490 output_operand_lossage ("Unsupported operand for code '%c'", code);
3492 break;
3494 case 'e':
3495 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3497 int n;
3499 if (GET_CODE (x) != CONST_INT
3500 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3502 output_operand_lossage ("invalid operand for '%%%c'", code);
3503 return;
3506 switch (n)
3508 case 3:
3509 fputc ('b', f);
3510 break;
3511 case 4:
3512 fputc ('h', f);
3513 break;
3514 case 5:
3515 fputc ('w', f);
3516 break;
3517 default:
3518 output_operand_lossage ("invalid operand for '%%%c'", code);
3519 return;
3522 break;
3524 case 'p':
3526 int n;
3528 /* Print N such that 2^N == X. */
3529 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3531 output_operand_lossage ("invalid operand for '%%%c'", code);
3532 return;
3535 asm_fprintf (f, "%d", n);
3537 break;
3539 case 'P':
3540 /* Print the number of non-zero bits in X (a const_int). */
3541 if (GET_CODE (x) != CONST_INT)
3543 output_operand_lossage ("invalid operand for '%%%c'", code);
3544 return;
3547 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3548 break;
3550 case 'H':
3551 /* Print the higher numbered register of a pair (TImode) of regs. */
3552 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3554 output_operand_lossage ("invalid operand for '%%%c'", code);
3555 return;
3558 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3559 break;
3561 case 'm':
3562 /* Print a condition (eq, ne, etc). */
3564 /* CONST_TRUE_RTX means always -- that's the default. */
3565 if (x == const_true_rtx)
3566 return;
3568 if (!COMPARISON_P (x))
3570 output_operand_lossage ("invalid operand for '%%%c'", code);
3571 return;
3574 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3575 break;
3577 case 'M':
3578 /* Print the inverse of a condition (eq <-> ne, etc). */
3580 /* CONST_TRUE_RTX means never -- that's the default. */
3581 if (x == const_true_rtx)
3583 fputs ("nv", f);
3584 return;
3587 if (!COMPARISON_P (x))
3589 output_operand_lossage ("invalid operand for '%%%c'", code);
3590 return;
3593 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3594 (aarch64_get_condition_code (x))], f);
3595 break;
3597 case 'b':
3598 case 'h':
3599 case 's':
3600 case 'd':
3601 case 'q':
3602 /* Print a scalar FP/SIMD register name. */
3603 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3605 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3606 return;
3608 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3609 break;
3611 case 'S':
3612 case 'T':
3613 case 'U':
3614 case 'V':
3615 /* Print the first FP/SIMD register name in a list. */
3616 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3618 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3619 return;
3621 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3622 break;
3624 case 'X':
3625 /* Print bottom 16 bits of integer constant in hex. */
3626 if (GET_CODE (x) != CONST_INT)
3628 output_operand_lossage ("invalid operand for '%%%c'", code);
3629 return;
3631 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3632 break;
3634 case 'w':
3635 case 'x':
3636 /* Print a general register name or the zero register (32-bit or
3637 64-bit). */
3638 if (x == const0_rtx
3639 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3641 asm_fprintf (f, "%czr", code);
3642 break;
3645 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3647 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3648 break;
3651 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3653 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3654 break;
3657 /* Fall through */
3659 case 0:
3660 /* Print a normal operand, if it's a general register, then we
3661 assume DImode. */
3662 if (x == NULL)
3664 output_operand_lossage ("missing operand");
3665 return;
3668 switch (GET_CODE (x))
3670 case REG:
3671 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3672 break;
3674 case MEM:
3675 aarch64_memory_reference_mode = GET_MODE (x);
3676 output_address (XEXP (x, 0));
3677 break;
3679 case LABEL_REF:
3680 case SYMBOL_REF:
3681 output_addr_const (asm_out_file, x);
3682 break;
3684 case CONST_INT:
3685 asm_fprintf (f, "%wd", INTVAL (x));
3686 break;
3688 case CONST_VECTOR:
3689 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3691 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3692 HOST_WIDE_INT_MIN,
3693 HOST_WIDE_INT_MAX));
3694 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3696 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3698 fputc ('0', f);
3700 else
3701 gcc_unreachable ();
3702 break;
3704 case CONST_DOUBLE:
3705 /* CONST_DOUBLE can represent a double-width integer.
3706 In this case, the mode of x is VOIDmode. */
3707 if (GET_MODE (x) == VOIDmode)
3708 ; /* Do Nothing. */
3709 else if (aarch64_float_const_zero_rtx_p (x))
3711 fputc ('0', f);
3712 break;
3714 else if (aarch64_float_const_representable_p (x))
3716 #define buf_size 20
3717 char float_buf[buf_size] = {'\0'};
3718 REAL_VALUE_TYPE r;
3719 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3720 real_to_decimal_for_mode (float_buf, &r,
3721 buf_size, buf_size,
3722 1, GET_MODE (x));
3723 asm_fprintf (asm_out_file, "%s", float_buf);
3724 break;
3725 #undef buf_size
3727 output_operand_lossage ("invalid constant");
3728 return;
3729 default:
3730 output_operand_lossage ("invalid operand");
3731 return;
3733 break;
3735 case 'A':
3736 if (GET_CODE (x) == HIGH)
3737 x = XEXP (x, 0);
3739 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3741 case SYMBOL_SMALL_GOT:
3742 asm_fprintf (asm_out_file, ":got:");
3743 break;
3745 case SYMBOL_SMALL_TLSGD:
3746 asm_fprintf (asm_out_file, ":tlsgd:");
3747 break;
3749 case SYMBOL_SMALL_TLSDESC:
3750 asm_fprintf (asm_out_file, ":tlsdesc:");
3751 break;
3753 case SYMBOL_SMALL_GOTTPREL:
3754 asm_fprintf (asm_out_file, ":gottprel:");
3755 break;
3757 case SYMBOL_SMALL_TPREL:
3758 asm_fprintf (asm_out_file, ":tprel:");
3759 break;
3761 case SYMBOL_TINY_GOT:
3762 gcc_unreachable ();
3763 break;
3765 default:
3766 break;
3768 output_addr_const (asm_out_file, x);
3769 break;
3771 case 'L':
3772 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3774 case SYMBOL_SMALL_GOT:
3775 asm_fprintf (asm_out_file, ":lo12:");
3776 break;
3778 case SYMBOL_SMALL_TLSGD:
3779 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3780 break;
3782 case SYMBOL_SMALL_TLSDESC:
3783 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3784 break;
3786 case SYMBOL_SMALL_GOTTPREL:
3787 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3788 break;
3790 case SYMBOL_SMALL_TPREL:
3791 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3792 break;
3794 case SYMBOL_TINY_GOT:
3795 asm_fprintf (asm_out_file, ":got:");
3796 break;
3798 default:
3799 break;
3801 output_addr_const (asm_out_file, x);
3802 break;
3804 case 'G':
3806 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3808 case SYMBOL_SMALL_TPREL:
3809 asm_fprintf (asm_out_file, ":tprel_hi12:");
3810 break;
3811 default:
3812 break;
3814 output_addr_const (asm_out_file, x);
3815 break;
3817 default:
3818 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3819 return;
3823 void
3824 aarch64_print_operand_address (FILE *f, rtx x)
3826 struct aarch64_address_info addr;
3828 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3829 MEM, true))
3830 switch (addr.type)
3832 case ADDRESS_REG_IMM:
3833 if (addr.offset == const0_rtx)
3834 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3835 else
3836 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3837 INTVAL (addr.offset));
3838 return;
3840 case ADDRESS_REG_REG:
3841 if (addr.shift == 0)
3842 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3843 reg_names [REGNO (addr.offset)]);
3844 else
3845 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3846 reg_names [REGNO (addr.offset)], addr.shift);
3847 return;
3849 case ADDRESS_REG_UXTW:
3850 if (addr.shift == 0)
3851 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3852 REGNO (addr.offset) - R0_REGNUM);
3853 else
3854 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3855 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3856 return;
3858 case ADDRESS_REG_SXTW:
3859 if (addr.shift == 0)
3860 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3861 REGNO (addr.offset) - R0_REGNUM);
3862 else
3863 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3864 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3865 return;
3867 case ADDRESS_REG_WB:
3868 switch (GET_CODE (x))
3870 case PRE_INC:
3871 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3872 GET_MODE_SIZE (aarch64_memory_reference_mode));
3873 return;
3874 case POST_INC:
3875 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3876 GET_MODE_SIZE (aarch64_memory_reference_mode));
3877 return;
3878 case PRE_DEC:
3879 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3880 GET_MODE_SIZE (aarch64_memory_reference_mode));
3881 return;
3882 case POST_DEC:
3883 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3884 GET_MODE_SIZE (aarch64_memory_reference_mode));
3885 return;
3886 case PRE_MODIFY:
3887 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3888 INTVAL (addr.offset));
3889 return;
3890 case POST_MODIFY:
3891 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3892 INTVAL (addr.offset));
3893 return;
3894 default:
3895 break;
3897 break;
3899 case ADDRESS_LO_SUM:
3900 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3901 output_addr_const (f, addr.offset);
3902 asm_fprintf (f, "]");
3903 return;
3905 case ADDRESS_SYMBOLIC:
3906 break;
3909 output_addr_const (f, x);
3912 bool
3913 aarch64_label_mentioned_p (rtx x)
3915 const char *fmt;
3916 int i;
3918 if (GET_CODE (x) == LABEL_REF)
3919 return true;
3921 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3922 referencing instruction, but they are constant offsets, not
3923 symbols. */
3924 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3925 return false;
3927 fmt = GET_RTX_FORMAT (GET_CODE (x));
3928 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3930 if (fmt[i] == 'E')
3932 int j;
3934 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3935 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3936 return 1;
3938 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3939 return 1;
3942 return 0;
3945 /* Implement REGNO_REG_CLASS. */
3947 enum reg_class
3948 aarch64_regno_regclass (unsigned regno)
3950 if (GP_REGNUM_P (regno))
3951 return CORE_REGS;
3953 if (regno == SP_REGNUM)
3954 return STACK_REG;
3956 if (regno == FRAME_POINTER_REGNUM
3957 || regno == ARG_POINTER_REGNUM)
3958 return POINTER_REGS;
3960 if (FP_REGNUM_P (regno))
3961 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3963 return NO_REGS;
3966 /* Try a machine-dependent way of reloading an illegitimate address
3967 operand. If we find one, push the reload and return the new rtx. */
3970 aarch64_legitimize_reload_address (rtx *x_p,
3971 enum machine_mode mode,
3972 int opnum, int type,
3973 int ind_levels ATTRIBUTE_UNUSED)
3975 rtx x = *x_p;
3977 /* Do not allow mem (plus (reg, const)) if vector mode. */
3978 if (aarch64_vector_mode_p (mode)
3979 && GET_CODE (x) == PLUS
3980 && REG_P (XEXP (x, 0))
3981 && CONST_INT_P (XEXP (x, 1)))
3983 rtx orig_rtx = x;
3984 x = copy_rtx (x);
3985 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3986 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3987 opnum, (enum reload_type) type);
3988 return x;
3991 /* We must recognize output that we have already generated ourselves. */
3992 if (GET_CODE (x) == PLUS
3993 && GET_CODE (XEXP (x, 0)) == PLUS
3994 && REG_P (XEXP (XEXP (x, 0), 0))
3995 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3996 && CONST_INT_P (XEXP (x, 1)))
3998 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3999 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4000 opnum, (enum reload_type) type);
4001 return x;
4004 /* We wish to handle large displacements off a base register by splitting
4005 the addend across an add and the mem insn. This can cut the number of
4006 extra insns needed from 3 to 1. It is only useful for load/store of a
4007 single register with 12 bit offset field. */
4008 if (GET_CODE (x) == PLUS
4009 && REG_P (XEXP (x, 0))
4010 && CONST_INT_P (XEXP (x, 1))
4011 && HARD_REGISTER_P (XEXP (x, 0))
4012 && mode != TImode
4013 && mode != TFmode
4014 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4016 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4017 HOST_WIDE_INT low = val & 0xfff;
4018 HOST_WIDE_INT high = val - low;
4019 HOST_WIDE_INT offs;
4020 rtx cst;
4021 enum machine_mode xmode = GET_MODE (x);
4023 /* In ILP32, xmode can be either DImode or SImode. */
4024 gcc_assert (xmode == DImode || xmode == SImode);
4026 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4027 BLKmode alignment. */
4028 if (GET_MODE_SIZE (mode) == 0)
4029 return NULL_RTX;
4031 offs = low % GET_MODE_SIZE (mode);
4033 /* Align misaligned offset by adjusting high part to compensate. */
4034 if (offs != 0)
4036 if (aarch64_uimm12_shift (high + offs))
4038 /* Align down. */
4039 low = low - offs;
4040 high = high + offs;
4042 else
4044 /* Align up. */
4045 offs = GET_MODE_SIZE (mode) - offs;
4046 low = low + offs;
4047 high = high + (low & 0x1000) - offs;
4048 low &= 0xfff;
4052 /* Check for overflow. */
4053 if (high + low != val)
4054 return NULL_RTX;
4056 cst = GEN_INT (high);
4057 if (!aarch64_uimm12_shift (high))
4058 cst = force_const_mem (xmode, cst);
4060 /* Reload high part into base reg, leaving the low part
4061 in the mem instruction.
4062 Note that replacing this gen_rtx_PLUS with plus_constant is
4063 wrong in this case because we rely on the
4064 (plus (plus reg c1) c2) structure being preserved so that
4065 XEXP (*p, 0) in push_reload below uses the correct term. */
4066 x = gen_rtx_PLUS (xmode,
4067 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4068 GEN_INT (low));
4070 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4071 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4072 opnum, (enum reload_type) type);
4073 return x;
4076 return NULL_RTX;
4080 static reg_class_t
4081 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4082 reg_class_t rclass,
4083 enum machine_mode mode,
4084 secondary_reload_info *sri)
4086 /* Without the TARGET_SIMD instructions we cannot move a Q register
4087 to a Q register directly. We need a scratch. */
4088 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4089 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4090 && reg_class_subset_p (rclass, FP_REGS))
4092 if (mode == TFmode)
4093 sri->icode = CODE_FOR_aarch64_reload_movtf;
4094 else if (mode == TImode)
4095 sri->icode = CODE_FOR_aarch64_reload_movti;
4096 return NO_REGS;
4099 /* A TFmode or TImode memory access should be handled via an FP_REGS
4100 because AArch64 has richer addressing modes for LDR/STR instructions
4101 than LDP/STP instructions. */
4102 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4103 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4104 return FP_REGS;
4106 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4107 return CORE_REGS;
4109 return NO_REGS;
4112 static bool
4113 aarch64_can_eliminate (const int from, const int to)
4115 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4116 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4118 if (frame_pointer_needed)
4120 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4121 return true;
4122 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4123 return false;
4124 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4125 && !cfun->calls_alloca)
4126 return true;
4127 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4128 return true;
4130 return false;
4133 return true;
4136 HOST_WIDE_INT
4137 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4139 HOST_WIDE_INT frame_size;
4140 HOST_WIDE_INT offset;
4142 aarch64_layout_frame ();
4143 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4144 + crtl->outgoing_args_size
4145 + cfun->machine->saved_varargs_size);
4147 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4148 offset = frame_size;
4150 if (to == HARD_FRAME_POINTER_REGNUM)
4152 if (from == ARG_POINTER_REGNUM)
4153 return offset - crtl->outgoing_args_size;
4155 if (from == FRAME_POINTER_REGNUM)
4156 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4159 if (to == STACK_POINTER_REGNUM)
4161 if (from == FRAME_POINTER_REGNUM)
4163 HOST_WIDE_INT elim = crtl->outgoing_args_size
4164 + cfun->machine->frame.saved_regs_size
4165 + get_frame_size ()
4166 - cfun->machine->frame.fp_lr_offset;
4167 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4168 return elim;
4172 return offset;
4176 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4177 previous frame. */
4180 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4182 if (count != 0)
4183 return const0_rtx;
4184 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4188 static void
4189 aarch64_asm_trampoline_template (FILE *f)
4191 if (TARGET_ILP32)
4193 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4194 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4196 else
4198 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4199 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4201 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4202 assemble_aligned_integer (4, const0_rtx);
4203 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4204 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4207 static void
4208 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4210 rtx fnaddr, mem, a_tramp;
4211 const int tramp_code_sz = 16;
4213 /* Don't need to copy the trailing D-words, we fill those in below. */
4214 emit_block_move (m_tramp, assemble_trampoline_template (),
4215 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4216 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4217 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4218 if (GET_MODE (fnaddr) != ptr_mode)
4219 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4220 emit_move_insn (mem, fnaddr);
4222 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4223 emit_move_insn (mem, chain_value);
4225 /* XXX We should really define a "clear_cache" pattern and use
4226 gen_clear_cache(). */
4227 a_tramp = XEXP (m_tramp, 0);
4228 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4229 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4230 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4231 ptr_mode);
4234 static unsigned char
4235 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4237 switch (regclass)
4239 case CORE_REGS:
4240 case POINTER_REGS:
4241 case GENERAL_REGS:
4242 case ALL_REGS:
4243 case FP_REGS:
4244 case FP_LO_REGS:
4245 return
4246 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4247 (GET_MODE_SIZE (mode) + 7) / 8;
4248 case STACK_REG:
4249 return 1;
4251 case NO_REGS:
4252 return 0;
4254 default:
4255 break;
4257 gcc_unreachable ();
4260 static reg_class_t
4261 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4263 if (regclass == POINTER_REGS)
4264 return GENERAL_REGS;
4266 if (regclass == STACK_REG)
4268 if (REG_P(x)
4269 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4270 return regclass;
4272 return NO_REGS;
4275 /* If it's an integer immediate that MOVI can't handle, then
4276 FP_REGS is not an option, so we return NO_REGS instead. */
4277 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4278 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4279 return NO_REGS;
4281 /* Register eliminiation can result in a request for
4282 SP+constant->FP_REGS. We cannot support such operations which
4283 use SP as source and an FP_REG as destination, so reject out
4284 right now. */
4285 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4287 rtx lhs = XEXP (x, 0);
4289 /* Look through a possible SUBREG introduced by ILP32. */
4290 if (GET_CODE (lhs) == SUBREG)
4291 lhs = SUBREG_REG (lhs);
4293 gcc_assert (REG_P (lhs));
4294 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4295 POINTER_REGS));
4296 return NO_REGS;
4299 return regclass;
4302 void
4303 aarch64_asm_output_labelref (FILE* f, const char *name)
4305 asm_fprintf (f, "%U%s", name);
4308 static void
4309 aarch64_elf_asm_constructor (rtx symbol, int priority)
4311 if (priority == DEFAULT_INIT_PRIORITY)
4312 default_ctor_section_asm_out_constructor (symbol, priority);
4313 else
4315 section *s;
4316 char buf[18];
4317 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4318 s = get_section (buf, SECTION_WRITE, NULL);
4319 switch_to_section (s);
4320 assemble_align (POINTER_SIZE);
4321 assemble_aligned_integer (POINTER_BYTES, symbol);
4325 static void
4326 aarch64_elf_asm_destructor (rtx symbol, int priority)
4328 if (priority == DEFAULT_INIT_PRIORITY)
4329 default_dtor_section_asm_out_destructor (symbol, priority);
4330 else
4332 section *s;
4333 char buf[18];
4334 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4335 s = get_section (buf, SECTION_WRITE, NULL);
4336 switch_to_section (s);
4337 assemble_align (POINTER_SIZE);
4338 assemble_aligned_integer (POINTER_BYTES, symbol);
4342 const char*
4343 aarch64_output_casesi (rtx *operands)
4345 char buf[100];
4346 char label[100];
4347 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4348 int index;
4349 static const char *const patterns[4][2] =
4352 "ldrb\t%w3, [%0,%w1,uxtw]",
4353 "add\t%3, %4, %w3, sxtb #2"
4356 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4357 "add\t%3, %4, %w3, sxth #2"
4360 "ldr\t%w3, [%0,%w1,uxtw #2]",
4361 "add\t%3, %4, %w3, sxtw #2"
4363 /* We assume that DImode is only generated when not optimizing and
4364 that we don't really need 64-bit address offsets. That would
4365 imply an object file with 8GB of code in a single function! */
4367 "ldr\t%w3, [%0,%w1,uxtw #2]",
4368 "add\t%3, %4, %w3, sxtw #2"
4372 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4374 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4376 gcc_assert (index >= 0 && index <= 3);
4378 /* Need to implement table size reduction, by chaning the code below. */
4379 output_asm_insn (patterns[index][0], operands);
4380 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4381 snprintf (buf, sizeof (buf),
4382 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4383 output_asm_insn (buf, operands);
4384 output_asm_insn (patterns[index][1], operands);
4385 output_asm_insn ("br\t%3", operands);
4386 assemble_label (asm_out_file, label);
4387 return "";
4391 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4392 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4393 operator. */
4396 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4398 if (shift >= 0 && shift <= 3)
4400 int size;
4401 for (size = 8; size <= 32; size *= 2)
4403 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4404 if (mask == bits << shift)
4405 return size;
4408 return 0;
4411 static bool
4412 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4413 const_rtx x ATTRIBUTE_UNUSED)
4415 /* We can't use blocks for constants when we're using a per-function
4416 constant pool. */
4417 return false;
4420 static section *
4421 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4422 rtx x ATTRIBUTE_UNUSED,
4423 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4425 /* Force all constant pool entries into the current function section. */
4426 return function_section (current_function_decl);
4430 /* Costs. */
4432 /* Helper function for rtx cost calculation. Strip a shift expression
4433 from X. Returns the inner operand if successful, or the original
4434 expression on failure. */
4435 static rtx
4436 aarch64_strip_shift (rtx x)
4438 rtx op = x;
4440 if ((GET_CODE (op) == ASHIFT
4441 || GET_CODE (op) == ASHIFTRT
4442 || GET_CODE (op) == LSHIFTRT)
4443 && CONST_INT_P (XEXP (op, 1)))
4444 return XEXP (op, 0);
4446 if (GET_CODE (op) == MULT
4447 && CONST_INT_P (XEXP (op, 1))
4448 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4449 return XEXP (op, 0);
4451 return x;
4454 /* Helper function for rtx cost calculation. Strip a shift or extend
4455 expression from X. Returns the inner operand if successful, or the
4456 original expression on failure. We deal with a number of possible
4457 canonicalization variations here. */
4458 static rtx
4459 aarch64_strip_shift_or_extend (rtx x)
4461 rtx op = x;
4463 /* Zero and sign extraction of a widened value. */
4464 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4465 && XEXP (op, 2) == const0_rtx
4466 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4467 XEXP (op, 1)))
4468 return XEXP (XEXP (op, 0), 0);
4470 /* It can also be represented (for zero-extend) as an AND with an
4471 immediate. */
4472 if (GET_CODE (op) == AND
4473 && GET_CODE (XEXP (op, 0)) == MULT
4474 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4475 && CONST_INT_P (XEXP (op, 1))
4476 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4477 INTVAL (XEXP (op, 1))) != 0)
4478 return XEXP (XEXP (op, 0), 0);
4480 /* Now handle extended register, as this may also have an optional
4481 left shift by 1..4. */
4482 if (GET_CODE (op) == ASHIFT
4483 && CONST_INT_P (XEXP (op, 1))
4484 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4485 op = XEXP (op, 0);
4487 if (GET_CODE (op) == ZERO_EXTEND
4488 || GET_CODE (op) == SIGN_EXTEND)
4489 op = XEXP (op, 0);
4491 if (op != x)
4492 return op;
4494 return aarch64_strip_shift (x);
4497 /* Calculate the cost of calculating X, storing it in *COST. Result
4498 is true if the total cost of the operation has now been calculated. */
4499 static bool
4500 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4501 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4503 rtx op0, op1;
4504 const struct cpu_cost_table *extra_cost
4505 = aarch64_tune_params->insn_extra_cost;
4507 switch (code)
4509 case SET:
4510 op0 = SET_DEST (x);
4511 op1 = SET_SRC (x);
4513 switch (GET_CODE (op0))
4515 case MEM:
4516 if (speed)
4517 *cost += extra_cost->ldst.store;
4519 if (op1 != const0_rtx)
4520 *cost += rtx_cost (op1, SET, 1, speed);
4521 return true;
4523 case SUBREG:
4524 if (! REG_P (SUBREG_REG (op0)))
4525 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4526 /* Fall through. */
4527 case REG:
4528 /* Cost is just the cost of the RHS of the set. */
4529 *cost += rtx_cost (op1, SET, 1, true);
4530 return true;
4532 case ZERO_EXTRACT: /* Bit-field insertion. */
4533 case SIGN_EXTRACT:
4534 /* Strip any redundant widening of the RHS to meet the width of
4535 the target. */
4536 if (GET_CODE (op1) == SUBREG)
4537 op1 = SUBREG_REG (op1);
4538 if ((GET_CODE (op1) == ZERO_EXTEND
4539 || GET_CODE (op1) == SIGN_EXTEND)
4540 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4541 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4542 >= INTVAL (XEXP (op0, 1))))
4543 op1 = XEXP (op1, 0);
4544 *cost += rtx_cost (op1, SET, 1, speed);
4545 return true;
4547 default:
4548 break;
4550 return false;
4552 case MEM:
4553 if (speed)
4554 *cost += extra_cost->ldst.load;
4556 return true;
4558 case NEG:
4559 op0 = CONST0_RTX (GET_MODE (x));
4560 op1 = XEXP (x, 0);
4561 goto cost_minus;
4563 case COMPARE:
4564 op0 = XEXP (x, 0);
4565 op1 = XEXP (x, 1);
4567 if (op1 == const0_rtx
4568 && GET_CODE (op0) == AND)
4570 x = op0;
4571 goto cost_logic;
4574 /* Comparisons can work if the order is swapped.
4575 Canonicalization puts the more complex operation first, but
4576 we want it in op1. */
4577 if (! (REG_P (op0)
4578 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4580 op0 = XEXP (x, 1);
4581 op1 = XEXP (x, 0);
4583 goto cost_minus;
4585 case MINUS:
4586 op0 = XEXP (x, 0);
4587 op1 = XEXP (x, 1);
4589 cost_minus:
4590 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4591 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4592 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4594 if (op0 != const0_rtx)
4595 *cost += rtx_cost (op0, MINUS, 0, speed);
4597 if (CONST_INT_P (op1))
4599 if (!aarch64_uimm12_shift (INTVAL (op1)))
4600 *cost += rtx_cost (op1, MINUS, 1, speed);
4602 else
4604 op1 = aarch64_strip_shift_or_extend (op1);
4605 *cost += rtx_cost (op1, MINUS, 1, speed);
4607 return true;
4610 return false;
4612 case PLUS:
4613 op0 = XEXP (x, 0);
4614 op1 = XEXP (x, 1);
4616 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4618 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4620 *cost += rtx_cost (op0, PLUS, 0, speed);
4622 else
4624 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4626 if (new_op0 == op0
4627 && GET_CODE (op0) == MULT)
4629 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4630 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4631 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4632 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4634 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4635 speed)
4636 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4637 speed)
4638 + rtx_cost (op1, PLUS, 1, speed));
4639 if (speed)
4640 *cost +=
4641 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4642 return true;
4645 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4646 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4647 + rtx_cost (op1, PLUS, 1, speed));
4649 if (speed)
4650 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4652 return true;
4655 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4656 + rtx_cost (op1, PLUS, 1, speed));
4658 return true;
4661 return false;
4663 case IOR:
4664 case XOR:
4665 case AND:
4666 cost_logic:
4667 op0 = XEXP (x, 0);
4668 op1 = XEXP (x, 1);
4670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4672 if (CONST_INT_P (op1)
4673 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4675 *cost += rtx_cost (op0, AND, 0, speed);
4677 else
4679 if (GET_CODE (op0) == NOT)
4680 op0 = XEXP (op0, 0);
4681 op0 = aarch64_strip_shift (op0);
4682 *cost += (rtx_cost (op0, AND, 0, speed)
4683 + rtx_cost (op1, AND, 1, speed));
4685 return true;
4687 return false;
4689 case ZERO_EXTEND:
4690 if ((GET_MODE (x) == DImode
4691 && GET_MODE (XEXP (x, 0)) == SImode)
4692 || GET_CODE (XEXP (x, 0)) == MEM)
4694 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4695 return true;
4697 return false;
4699 case SIGN_EXTEND:
4700 if (GET_CODE (XEXP (x, 0)) == MEM)
4702 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4703 return true;
4705 return false;
4707 case ROTATE:
4708 if (!CONST_INT_P (XEXP (x, 1)))
4709 *cost += COSTS_N_INSNS (2);
4710 /* Fall through. */
4711 case ROTATERT:
4712 case LSHIFTRT:
4713 case ASHIFT:
4714 case ASHIFTRT:
4716 /* Shifting by a register often takes an extra cycle. */
4717 if (speed && !CONST_INT_P (XEXP (x, 1)))
4718 *cost += extra_cost->alu.arith_shift_reg;
4720 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4721 return true;
4723 case HIGH:
4724 if (!CONSTANT_P (XEXP (x, 0)))
4725 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4726 return true;
4728 case LO_SUM:
4729 if (!CONSTANT_P (XEXP (x, 1)))
4730 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4731 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4732 return true;
4734 case ZERO_EXTRACT:
4735 case SIGN_EXTRACT:
4736 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4737 return true;
4739 case MULT:
4740 op0 = XEXP (x, 0);
4741 op1 = XEXP (x, 1);
4743 *cost = COSTS_N_INSNS (1);
4744 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4746 if (CONST_INT_P (op1)
4747 && exact_log2 (INTVAL (op1)) > 0)
4749 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4750 return true;
4753 if ((GET_CODE (op0) == ZERO_EXTEND
4754 && GET_CODE (op1) == ZERO_EXTEND)
4755 || (GET_CODE (op0) == SIGN_EXTEND
4756 && GET_CODE (op1) == SIGN_EXTEND))
4758 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4759 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4760 if (speed)
4761 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4762 return true;
4765 if (speed)
4766 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4768 else if (speed)
4770 if (GET_MODE (x) == DFmode)
4771 *cost += extra_cost->fp[1].mult;
4772 else if (GET_MODE (x) == SFmode)
4773 *cost += extra_cost->fp[0].mult;
4776 return false; /* All arguments need to be in registers. */
4778 case MOD:
4779 case UMOD:
4780 *cost = COSTS_N_INSNS (2);
4781 if (speed)
4783 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4784 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4785 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4786 else if (GET_MODE (x) == DFmode)
4787 *cost += (extra_cost->fp[1].mult
4788 + extra_cost->fp[1].div);
4789 else if (GET_MODE (x) == SFmode)
4790 *cost += (extra_cost->fp[0].mult
4791 + extra_cost->fp[0].div);
4793 return false; /* All arguments need to be in registers. */
4795 case DIV:
4796 case UDIV:
4797 *cost = COSTS_N_INSNS (1);
4798 if (speed)
4800 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4801 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4802 else if (GET_MODE (x) == DFmode)
4803 *cost += extra_cost->fp[1].div;
4804 else if (GET_MODE (x) == SFmode)
4805 *cost += extra_cost->fp[0].div;
4807 return false; /* All arguments need to be in registers. */
4809 default:
4810 break;
4812 return false;
4815 static int
4816 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4817 enum machine_mode mode ATTRIBUTE_UNUSED,
4818 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4820 enum rtx_code c = GET_CODE (x);
4821 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4823 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4824 return addr_cost->pre_modify;
4826 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4827 return addr_cost->post_modify;
4829 if (c == PLUS)
4831 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4832 return addr_cost->imm_offset;
4833 else if (GET_CODE (XEXP (x, 0)) == MULT
4834 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4835 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4836 return addr_cost->register_extend;
4838 return addr_cost->register_offset;
4840 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4841 return addr_cost->imm_offset;
4843 return 0;
4846 static int
4847 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4848 reg_class_t from, reg_class_t to)
4850 const struct cpu_regmove_cost *regmove_cost
4851 = aarch64_tune_params->regmove_cost;
4853 /* Moving between GPR and stack cost is the same as GP2GP. */
4854 if ((from == GENERAL_REGS && to == STACK_REG)
4855 || (to == GENERAL_REGS && from == STACK_REG))
4856 return regmove_cost->GP2GP;
4858 /* To/From the stack register, we move via the gprs. */
4859 if (to == STACK_REG || from == STACK_REG)
4860 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
4861 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
4863 if (from == GENERAL_REGS && to == GENERAL_REGS)
4864 return regmove_cost->GP2GP;
4865 else if (from == GENERAL_REGS)
4866 return regmove_cost->GP2FP;
4867 else if (to == GENERAL_REGS)
4868 return regmove_cost->FP2GP;
4870 /* When AdvSIMD instructions are disabled it is not possible to move
4871 a 128-bit value directly between Q registers. This is handled in
4872 secondary reload. A general register is used as a scratch to move
4873 the upper DI value and the lower DI value is moved directly,
4874 hence the cost is the sum of three moves. */
4876 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4877 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4879 return regmove_cost->FP2FP;
4882 static int
4883 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4884 reg_class_t rclass ATTRIBUTE_UNUSED,
4885 bool in ATTRIBUTE_UNUSED)
4887 return aarch64_tune_params->memmov_cost;
4890 /* Return the number of instructions that can be issued per cycle. */
4891 static int
4892 aarch64_sched_issue_rate (void)
4894 return aarch64_tune_params->issue_rate;
4897 /* Vectorizer cost model target hooks. */
4899 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4900 static int
4901 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4902 tree vectype,
4903 int misalign ATTRIBUTE_UNUSED)
4905 unsigned elements;
4907 switch (type_of_cost)
4909 case scalar_stmt:
4910 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4912 case scalar_load:
4913 return aarch64_tune_params->vec_costs->scalar_load_cost;
4915 case scalar_store:
4916 return aarch64_tune_params->vec_costs->scalar_store_cost;
4918 case vector_stmt:
4919 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4921 case vector_load:
4922 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4924 case vector_store:
4925 return aarch64_tune_params->vec_costs->vec_store_cost;
4927 case vec_to_scalar:
4928 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4930 case scalar_to_vec:
4931 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4933 case unaligned_load:
4934 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4936 case unaligned_store:
4937 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4939 case cond_branch_taken:
4940 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4942 case cond_branch_not_taken:
4943 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4945 case vec_perm:
4946 case vec_promote_demote:
4947 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4949 case vec_construct:
4950 elements = TYPE_VECTOR_SUBPARTS (vectype);
4951 return elements / 2 + 1;
4953 default:
4954 gcc_unreachable ();
4958 /* Implement targetm.vectorize.add_stmt_cost. */
4959 static unsigned
4960 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4961 struct _stmt_vec_info *stmt_info, int misalign,
4962 enum vect_cost_model_location where)
4964 unsigned *cost = (unsigned *) data;
4965 unsigned retval = 0;
4967 if (flag_vect_cost_model)
4969 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4970 int stmt_cost =
4971 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4973 /* Statements in an inner loop relative to the loop being
4974 vectorized are weighted more heavily. The value here is
4975 a function (linear for now) of the loop nest level. */
4976 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4978 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4979 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4980 unsigned nest_level = loop_depth (loop);
4982 count *= nest_level;
4985 retval = (unsigned) (count * stmt_cost);
4986 cost[where] += retval;
4989 return retval;
4992 static void initialize_aarch64_code_model (void);
4994 /* Parse the architecture extension string. */
4996 static void
4997 aarch64_parse_extension (char *str)
4999 /* The extension string is parsed left to right. */
5000 const struct aarch64_option_extension *opt = NULL;
5002 /* Flag to say whether we are adding or removing an extension. */
5003 int adding_ext = -1;
5005 while (str != NULL && *str != 0)
5007 char *ext;
5008 size_t len;
5010 str++;
5011 ext = strchr (str, '+');
5013 if (ext != NULL)
5014 len = ext - str;
5015 else
5016 len = strlen (str);
5018 if (len >= 2 && strncmp (str, "no", 2) == 0)
5020 adding_ext = 0;
5021 len -= 2;
5022 str += 2;
5024 else if (len > 0)
5025 adding_ext = 1;
5027 if (len == 0)
5029 error ("missing feature modifier after %qs", "+no");
5030 return;
5033 /* Scan over the extensions table trying to find an exact match. */
5034 for (opt = all_extensions; opt->name != NULL; opt++)
5036 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5038 /* Add or remove the extension. */
5039 if (adding_ext)
5040 aarch64_isa_flags |= opt->flags_on;
5041 else
5042 aarch64_isa_flags &= ~(opt->flags_off);
5043 break;
5047 if (opt->name == NULL)
5049 /* Extension not found in list. */
5050 error ("unknown feature modifier %qs", str);
5051 return;
5054 str = ext;
5057 return;
5060 /* Parse the ARCH string. */
5062 static void
5063 aarch64_parse_arch (void)
5065 char *ext;
5066 const struct processor *arch;
5067 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5068 size_t len;
5070 strcpy (str, aarch64_arch_string);
5072 ext = strchr (str, '+');
5074 if (ext != NULL)
5075 len = ext - str;
5076 else
5077 len = strlen (str);
5079 if (len == 0)
5081 error ("missing arch name in -march=%qs", str);
5082 return;
5085 /* Loop through the list of supported ARCHs to find a match. */
5086 for (arch = all_architectures; arch->name != NULL; arch++)
5088 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5090 selected_arch = arch;
5091 aarch64_isa_flags = selected_arch->flags;
5093 if (!selected_cpu)
5094 selected_cpu = &all_cores[selected_arch->core];
5096 if (ext != NULL)
5098 /* ARCH string contains at least one extension. */
5099 aarch64_parse_extension (ext);
5102 if (strcmp (selected_arch->arch, selected_cpu->arch))
5104 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5105 selected_cpu->name, selected_arch->name);
5108 return;
5112 /* ARCH name not found in list. */
5113 error ("unknown value %qs for -march", str);
5114 return;
5117 /* Parse the CPU string. */
5119 static void
5120 aarch64_parse_cpu (void)
5122 char *ext;
5123 const struct processor *cpu;
5124 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5125 size_t len;
5127 strcpy (str, aarch64_cpu_string);
5129 ext = strchr (str, '+');
5131 if (ext != NULL)
5132 len = ext - str;
5133 else
5134 len = strlen (str);
5136 if (len == 0)
5138 error ("missing cpu name in -mcpu=%qs", str);
5139 return;
5142 /* Loop through the list of supported CPUs to find a match. */
5143 for (cpu = all_cores; cpu->name != NULL; cpu++)
5145 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5147 selected_cpu = cpu;
5148 selected_tune = cpu;
5149 aarch64_isa_flags = selected_cpu->flags;
5151 if (ext != NULL)
5153 /* CPU string contains at least one extension. */
5154 aarch64_parse_extension (ext);
5157 return;
5161 /* CPU name not found in list. */
5162 error ("unknown value %qs for -mcpu", str);
5163 return;
5166 /* Parse the TUNE string. */
5168 static void
5169 aarch64_parse_tune (void)
5171 const struct processor *cpu;
5172 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5173 strcpy (str, aarch64_tune_string);
5175 /* Loop through the list of supported CPUs to find a match. */
5176 for (cpu = all_cores; cpu->name != NULL; cpu++)
5178 if (strcmp (cpu->name, str) == 0)
5180 selected_tune = cpu;
5181 return;
5185 /* CPU name not found in list. */
5186 error ("unknown value %qs for -mtune", str);
5187 return;
5191 /* Implement TARGET_OPTION_OVERRIDE. */
5193 static void
5194 aarch64_override_options (void)
5196 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5197 If either of -march or -mtune is given, they override their
5198 respective component of -mcpu.
5200 So, first parse AARCH64_CPU_STRING, then the others, be careful
5201 with -march as, if -mcpu is not present on the command line, march
5202 must set a sensible default CPU. */
5203 if (aarch64_cpu_string)
5205 aarch64_parse_cpu ();
5208 if (aarch64_arch_string)
5210 aarch64_parse_arch ();
5213 if (aarch64_tune_string)
5215 aarch64_parse_tune ();
5218 #ifndef HAVE_AS_MABI_OPTION
5219 /* The compiler may have been configured with 2.23.* binutils, which does
5220 not have support for ILP32. */
5221 if (TARGET_ILP32)
5222 error ("Assembler does not support -mabi=ilp32");
5223 #endif
5225 initialize_aarch64_code_model ();
5227 aarch64_build_bitmask_table ();
5229 /* This target defaults to strict volatile bitfields. */
5230 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5231 flag_strict_volatile_bitfields = 1;
5233 /* If the user did not specify a processor, choose the default
5234 one for them. This will be the CPU set during configuration using
5235 --with-cpu, otherwise it is "generic". */
5236 if (!selected_cpu)
5238 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5239 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5242 gcc_assert (selected_cpu);
5244 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5245 if (!selected_tune)
5246 selected_tune = &all_cores[selected_cpu->core];
5248 aarch64_tune_flags = selected_tune->flags;
5249 aarch64_tune = selected_tune->core;
5250 aarch64_tune_params = selected_tune->tune;
5252 aarch64_override_options_after_change ();
5255 /* Implement targetm.override_options_after_change. */
5257 static void
5258 aarch64_override_options_after_change (void)
5260 if (flag_omit_frame_pointer)
5261 flag_omit_leaf_frame_pointer = false;
5262 else if (flag_omit_leaf_frame_pointer)
5263 flag_omit_frame_pointer = true;
5266 static struct machine_function *
5267 aarch64_init_machine_status (void)
5269 struct machine_function *machine;
5270 machine = ggc_alloc_cleared_machine_function ();
5271 return machine;
5274 void
5275 aarch64_init_expanders (void)
5277 init_machine_status = aarch64_init_machine_status;
5280 /* A checking mechanism for the implementation of the various code models. */
5281 static void
5282 initialize_aarch64_code_model (void)
5284 if (flag_pic)
5286 switch (aarch64_cmodel_var)
5288 case AARCH64_CMODEL_TINY:
5289 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5290 break;
5291 case AARCH64_CMODEL_SMALL:
5292 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5293 break;
5294 case AARCH64_CMODEL_LARGE:
5295 sorry ("code model %qs with -f%s", "large",
5296 flag_pic > 1 ? "PIC" : "pic");
5297 default:
5298 gcc_unreachable ();
5301 else
5302 aarch64_cmodel = aarch64_cmodel_var;
5305 /* Return true if SYMBOL_REF X binds locally. */
5307 static bool
5308 aarch64_symbol_binds_local_p (const_rtx x)
5310 return (SYMBOL_REF_DECL (x)
5311 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5312 : SYMBOL_REF_LOCAL_P (x));
5315 /* Return true if SYMBOL_REF X is thread local */
5316 static bool
5317 aarch64_tls_symbol_p (rtx x)
5319 if (! TARGET_HAVE_TLS)
5320 return false;
5322 if (GET_CODE (x) != SYMBOL_REF)
5323 return false;
5325 return SYMBOL_REF_TLS_MODEL (x) != 0;
5328 /* Classify a TLS symbol into one of the TLS kinds. */
5329 enum aarch64_symbol_type
5330 aarch64_classify_tls_symbol (rtx x)
5332 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5334 switch (tls_kind)
5336 case TLS_MODEL_GLOBAL_DYNAMIC:
5337 case TLS_MODEL_LOCAL_DYNAMIC:
5338 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5340 case TLS_MODEL_INITIAL_EXEC:
5341 return SYMBOL_SMALL_GOTTPREL;
5343 case TLS_MODEL_LOCAL_EXEC:
5344 return SYMBOL_SMALL_TPREL;
5346 case TLS_MODEL_EMULATED:
5347 case TLS_MODEL_NONE:
5348 return SYMBOL_FORCE_TO_MEM;
5350 default:
5351 gcc_unreachable ();
5355 /* Return the method that should be used to access SYMBOL_REF or
5356 LABEL_REF X in context CONTEXT. */
5358 enum aarch64_symbol_type
5359 aarch64_classify_symbol (rtx x,
5360 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5362 if (GET_CODE (x) == LABEL_REF)
5364 switch (aarch64_cmodel)
5366 case AARCH64_CMODEL_LARGE:
5367 return SYMBOL_FORCE_TO_MEM;
5369 case AARCH64_CMODEL_TINY_PIC:
5370 case AARCH64_CMODEL_TINY:
5371 return SYMBOL_TINY_ABSOLUTE;
5373 case AARCH64_CMODEL_SMALL_PIC:
5374 case AARCH64_CMODEL_SMALL:
5375 return SYMBOL_SMALL_ABSOLUTE;
5377 default:
5378 gcc_unreachable ();
5382 if (GET_CODE (x) == SYMBOL_REF)
5384 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5385 return SYMBOL_FORCE_TO_MEM;
5387 if (aarch64_tls_symbol_p (x))
5388 return aarch64_classify_tls_symbol (x);
5390 switch (aarch64_cmodel)
5392 case AARCH64_CMODEL_TINY:
5393 if (SYMBOL_REF_WEAK (x))
5394 return SYMBOL_FORCE_TO_MEM;
5395 return SYMBOL_TINY_ABSOLUTE;
5397 case AARCH64_CMODEL_SMALL:
5398 if (SYMBOL_REF_WEAK (x))
5399 return SYMBOL_FORCE_TO_MEM;
5400 return SYMBOL_SMALL_ABSOLUTE;
5402 case AARCH64_CMODEL_TINY_PIC:
5403 if (!aarch64_symbol_binds_local_p (x))
5404 return SYMBOL_TINY_GOT;
5405 return SYMBOL_TINY_ABSOLUTE;
5407 case AARCH64_CMODEL_SMALL_PIC:
5408 if (!aarch64_symbol_binds_local_p (x))
5409 return SYMBOL_SMALL_GOT;
5410 return SYMBOL_SMALL_ABSOLUTE;
5412 default:
5413 gcc_unreachable ();
5417 /* By default push everything into the constant pool. */
5418 return SYMBOL_FORCE_TO_MEM;
5421 bool
5422 aarch64_constant_address_p (rtx x)
5424 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5427 bool
5428 aarch64_legitimate_pic_operand_p (rtx x)
5430 if (GET_CODE (x) == SYMBOL_REF
5431 || (GET_CODE (x) == CONST
5432 && GET_CODE (XEXP (x, 0)) == PLUS
5433 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5434 return false;
5436 return true;
5439 /* Return true if X holds either a quarter-precision or
5440 floating-point +0.0 constant. */
5441 static bool
5442 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5444 if (!CONST_DOUBLE_P (x))
5445 return false;
5447 /* TODO: We could handle moving 0.0 to a TFmode register,
5448 but first we would like to refactor the movtf_aarch64
5449 to be more amicable to split moves properly and
5450 correctly gate on TARGET_SIMD. For now - reject all
5451 constants which are not to SFmode or DFmode registers. */
5452 if (!(mode == SFmode || mode == DFmode))
5453 return false;
5455 if (aarch64_float_const_zero_rtx_p (x))
5456 return true;
5457 return aarch64_float_const_representable_p (x);
5460 static bool
5461 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5463 /* Do not allow vector struct mode constants. We could support
5464 0 and -1 easily, but they need support in aarch64-simd.md. */
5465 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5466 return false;
5468 /* This could probably go away because
5469 we now decompose CONST_INTs according to expand_mov_immediate. */
5470 if ((GET_CODE (x) == CONST_VECTOR
5471 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5472 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5473 return !targetm.cannot_force_const_mem (mode, x);
5475 if (GET_CODE (x) == HIGH
5476 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5477 return true;
5479 return aarch64_constant_address_p (x);
5483 aarch64_load_tp (rtx target)
5485 if (!target
5486 || GET_MODE (target) != Pmode
5487 || !register_operand (target, Pmode))
5488 target = gen_reg_rtx (Pmode);
5490 /* Can return in any reg. */
5491 emit_insn (gen_aarch64_load_tp_hard (target));
5492 return target;
5495 /* On AAPCS systems, this is the "struct __va_list". */
5496 static GTY(()) tree va_list_type;
5498 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5499 Return the type to use as __builtin_va_list.
5501 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5503 struct __va_list
5505 void *__stack;
5506 void *__gr_top;
5507 void *__vr_top;
5508 int __gr_offs;
5509 int __vr_offs;
5510 }; */
5512 static tree
5513 aarch64_build_builtin_va_list (void)
5515 tree va_list_name;
5516 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5518 /* Create the type. */
5519 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5520 /* Give it the required name. */
5521 va_list_name = build_decl (BUILTINS_LOCATION,
5522 TYPE_DECL,
5523 get_identifier ("__va_list"),
5524 va_list_type);
5525 DECL_ARTIFICIAL (va_list_name) = 1;
5526 TYPE_NAME (va_list_type) = va_list_name;
5527 TYPE_STUB_DECL (va_list_type) = va_list_name;
5529 /* Create the fields. */
5530 f_stack = build_decl (BUILTINS_LOCATION,
5531 FIELD_DECL, get_identifier ("__stack"),
5532 ptr_type_node);
5533 f_grtop = build_decl (BUILTINS_LOCATION,
5534 FIELD_DECL, get_identifier ("__gr_top"),
5535 ptr_type_node);
5536 f_vrtop = build_decl (BUILTINS_LOCATION,
5537 FIELD_DECL, get_identifier ("__vr_top"),
5538 ptr_type_node);
5539 f_groff = build_decl (BUILTINS_LOCATION,
5540 FIELD_DECL, get_identifier ("__gr_offs"),
5541 integer_type_node);
5542 f_vroff = build_decl (BUILTINS_LOCATION,
5543 FIELD_DECL, get_identifier ("__vr_offs"),
5544 integer_type_node);
5546 DECL_ARTIFICIAL (f_stack) = 1;
5547 DECL_ARTIFICIAL (f_grtop) = 1;
5548 DECL_ARTIFICIAL (f_vrtop) = 1;
5549 DECL_ARTIFICIAL (f_groff) = 1;
5550 DECL_ARTIFICIAL (f_vroff) = 1;
5552 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5553 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5554 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5555 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5556 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5558 TYPE_FIELDS (va_list_type) = f_stack;
5559 DECL_CHAIN (f_stack) = f_grtop;
5560 DECL_CHAIN (f_grtop) = f_vrtop;
5561 DECL_CHAIN (f_vrtop) = f_groff;
5562 DECL_CHAIN (f_groff) = f_vroff;
5564 /* Compute its layout. */
5565 layout_type (va_list_type);
5567 return va_list_type;
5570 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5571 static void
5572 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5574 const CUMULATIVE_ARGS *cum;
5575 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576 tree stack, grtop, vrtop, groff, vroff;
5577 tree t;
5578 int gr_save_area_size;
5579 int vr_save_area_size;
5580 int vr_offset;
5582 cum = &crtl->args.info;
5583 gr_save_area_size
5584 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5585 vr_save_area_size
5586 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5588 if (TARGET_GENERAL_REGS_ONLY)
5590 if (cum->aapcs_nvrn > 0)
5591 sorry ("%qs and floating point or vector arguments",
5592 "-mgeneral-regs-only");
5593 vr_save_area_size = 0;
5596 f_stack = TYPE_FIELDS (va_list_type_node);
5597 f_grtop = DECL_CHAIN (f_stack);
5598 f_vrtop = DECL_CHAIN (f_grtop);
5599 f_groff = DECL_CHAIN (f_vrtop);
5600 f_vroff = DECL_CHAIN (f_groff);
5602 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5603 NULL_TREE);
5604 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5605 NULL_TREE);
5606 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5607 NULL_TREE);
5608 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5609 NULL_TREE);
5610 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5611 NULL_TREE);
5613 /* Emit code to initialize STACK, which points to the next varargs stack
5614 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5615 by named arguments. STACK is 8-byte aligned. */
5616 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5617 if (cum->aapcs_stack_size > 0)
5618 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5619 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5620 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5622 /* Emit code to initialize GRTOP, the top of the GR save area.
5623 virtual_incoming_args_rtx should have been 16 byte aligned. */
5624 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5625 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5626 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5628 /* Emit code to initialize VRTOP, the top of the VR save area.
5629 This address is gr_save_area_bytes below GRTOP, rounded
5630 down to the next 16-byte boundary. */
5631 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5632 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5633 STACK_BOUNDARY / BITS_PER_UNIT);
5635 if (vr_offset)
5636 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5637 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5640 /* Emit code to initialize GROFF, the offset from GRTOP of the
5641 next GPR argument. */
5642 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5643 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5644 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5646 /* Likewise emit code to initialize VROFF, the offset from FTOP
5647 of the next VR argument. */
5648 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5649 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5653 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5655 static tree
5656 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5657 gimple_seq *post_p ATTRIBUTE_UNUSED)
5659 tree addr;
5660 bool indirect_p;
5661 bool is_ha; /* is HFA or HVA. */
5662 bool dw_align; /* double-word align. */
5663 enum machine_mode ag_mode = VOIDmode;
5664 int nregs;
5665 enum machine_mode mode;
5667 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5668 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5669 HOST_WIDE_INT size, rsize, adjust, align;
5670 tree t, u, cond1, cond2;
5672 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5673 if (indirect_p)
5674 type = build_pointer_type (type);
5676 mode = TYPE_MODE (type);
5678 f_stack = TYPE_FIELDS (va_list_type_node);
5679 f_grtop = DECL_CHAIN (f_stack);
5680 f_vrtop = DECL_CHAIN (f_grtop);
5681 f_groff = DECL_CHAIN (f_vrtop);
5682 f_vroff = DECL_CHAIN (f_groff);
5684 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5685 f_stack, NULL_TREE);
5686 size = int_size_in_bytes (type);
5687 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5689 dw_align = false;
5690 adjust = 0;
5691 if (aarch64_vfp_is_call_or_return_candidate (mode,
5692 type,
5693 &ag_mode,
5694 &nregs,
5695 &is_ha))
5697 /* TYPE passed in fp/simd registers. */
5698 if (TARGET_GENERAL_REGS_ONLY)
5699 sorry ("%qs and floating point or vector arguments",
5700 "-mgeneral-regs-only");
5702 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5703 unshare_expr (valist), f_vrtop, NULL_TREE);
5704 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5705 unshare_expr (valist), f_vroff, NULL_TREE);
5707 rsize = nregs * UNITS_PER_VREG;
5709 if (is_ha)
5711 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5712 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5714 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5715 && size < UNITS_PER_VREG)
5717 adjust = UNITS_PER_VREG - size;
5720 else
5722 /* TYPE passed in general registers. */
5723 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5724 unshare_expr (valist), f_grtop, NULL_TREE);
5725 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5726 unshare_expr (valist), f_groff, NULL_TREE);
5727 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5728 nregs = rsize / UNITS_PER_WORD;
5730 if (align > 8)
5731 dw_align = true;
5733 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5734 && size < UNITS_PER_WORD)
5736 adjust = UNITS_PER_WORD - size;
5740 /* Get a local temporary for the field value. */
5741 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5743 /* Emit code to branch if off >= 0. */
5744 t = build2 (GE_EXPR, boolean_type_node, off,
5745 build_int_cst (TREE_TYPE (off), 0));
5746 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5748 if (dw_align)
5750 /* Emit: offs = (offs + 15) & -16. */
5751 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5752 build_int_cst (TREE_TYPE (off), 15));
5753 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5754 build_int_cst (TREE_TYPE (off), -16));
5755 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5757 else
5758 roundup = NULL;
5760 /* Update ap.__[g|v]r_offs */
5761 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5762 build_int_cst (TREE_TYPE (off), rsize));
5763 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5765 /* String up. */
5766 if (roundup)
5767 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5769 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5770 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5771 build_int_cst (TREE_TYPE (f_off), 0));
5772 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5774 /* String up: make sure the assignment happens before the use. */
5775 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5776 COND_EXPR_ELSE (cond1) = t;
5778 /* Prepare the trees handling the argument that is passed on the stack;
5779 the top level node will store in ON_STACK. */
5780 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5781 if (align > 8)
5783 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5784 t = fold_convert (intDI_type_node, arg);
5785 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5786 build_int_cst (TREE_TYPE (t), 15));
5787 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5788 build_int_cst (TREE_TYPE (t), -16));
5789 t = fold_convert (TREE_TYPE (arg), t);
5790 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5792 else
5793 roundup = NULL;
5794 /* Advance ap.__stack */
5795 t = fold_convert (intDI_type_node, arg);
5796 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5797 build_int_cst (TREE_TYPE (t), size + 7));
5798 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5799 build_int_cst (TREE_TYPE (t), -8));
5800 t = fold_convert (TREE_TYPE (arg), t);
5801 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5802 /* String up roundup and advance. */
5803 if (roundup)
5804 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5805 /* String up with arg */
5806 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5807 /* Big-endianness related address adjustment. */
5808 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5809 && size < UNITS_PER_WORD)
5811 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5812 size_int (UNITS_PER_WORD - size));
5813 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5816 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5817 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5819 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5820 t = off;
5821 if (adjust)
5822 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5823 build_int_cst (TREE_TYPE (off), adjust));
5825 t = fold_convert (sizetype, t);
5826 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5828 if (is_ha)
5830 /* type ha; // treat as "struct {ftype field[n];}"
5831 ... [computing offs]
5832 for (i = 0; i <nregs; ++i, offs += 16)
5833 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5834 return ha; */
5835 int i;
5836 tree tmp_ha, field_t, field_ptr_t;
5838 /* Declare a local variable. */
5839 tmp_ha = create_tmp_var_raw (type, "ha");
5840 gimple_add_tmp_var (tmp_ha);
5842 /* Establish the base type. */
5843 switch (ag_mode)
5845 case SFmode:
5846 field_t = float_type_node;
5847 field_ptr_t = float_ptr_type_node;
5848 break;
5849 case DFmode:
5850 field_t = double_type_node;
5851 field_ptr_t = double_ptr_type_node;
5852 break;
5853 case TFmode:
5854 field_t = long_double_type_node;
5855 field_ptr_t = long_double_ptr_type_node;
5856 break;
5857 /* The half precision and quad precision are not fully supported yet. Enable
5858 the following code after the support is complete. Need to find the correct
5859 type node for __fp16 *. */
5860 #if 0
5861 case HFmode:
5862 field_t = float_type_node;
5863 field_ptr_t = float_ptr_type_node;
5864 break;
5865 #endif
5866 case V2SImode:
5867 case V4SImode:
5869 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5870 field_t = build_vector_type_for_mode (innertype, ag_mode);
5871 field_ptr_t = build_pointer_type (field_t);
5873 break;
5874 default:
5875 gcc_assert (0);
5878 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5879 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5880 addr = t;
5881 t = fold_convert (field_ptr_t, addr);
5882 t = build2 (MODIFY_EXPR, field_t,
5883 build1 (INDIRECT_REF, field_t, tmp_ha),
5884 build1 (INDIRECT_REF, field_t, t));
5886 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5887 for (i = 1; i < nregs; ++i)
5889 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5890 u = fold_convert (field_ptr_t, addr);
5891 u = build2 (MODIFY_EXPR, field_t,
5892 build2 (MEM_REF, field_t, tmp_ha,
5893 build_int_cst (field_ptr_t,
5894 (i *
5895 int_size_in_bytes (field_t)))),
5896 build1 (INDIRECT_REF, field_t, u));
5897 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5900 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5901 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5904 COND_EXPR_ELSE (cond2) = t;
5905 addr = fold_convert (build_pointer_type (type), cond1);
5906 addr = build_va_arg_indirect_ref (addr);
5908 if (indirect_p)
5909 addr = build_va_arg_indirect_ref (addr);
5911 return addr;
5914 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5916 static void
5917 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5918 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5919 int no_rtl)
5921 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5922 CUMULATIVE_ARGS local_cum;
5923 int gr_saved, vr_saved;
5925 /* The caller has advanced CUM up to, but not beyond, the last named
5926 argument. Advance a local copy of CUM past the last "real" named
5927 argument, to find out how many registers are left over. */
5928 local_cum = *cum;
5929 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5931 /* Found out how many registers we need to save. */
5932 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5933 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5935 if (TARGET_GENERAL_REGS_ONLY)
5937 if (local_cum.aapcs_nvrn > 0)
5938 sorry ("%qs and floating point or vector arguments",
5939 "-mgeneral-regs-only");
5940 vr_saved = 0;
5943 if (!no_rtl)
5945 if (gr_saved > 0)
5947 rtx ptr, mem;
5949 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5950 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5951 - gr_saved * UNITS_PER_WORD);
5952 mem = gen_frame_mem (BLKmode, ptr);
5953 set_mem_alias_set (mem, get_varargs_alias_set ());
5955 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5956 mem, gr_saved);
5958 if (vr_saved > 0)
5960 /* We can't use move_block_from_reg, because it will use
5961 the wrong mode, storing D regs only. */
5962 enum machine_mode mode = TImode;
5963 int off, i;
5965 /* Set OFF to the offset from virtual_incoming_args_rtx of
5966 the first vector register. The VR save area lies below
5967 the GR one, and is aligned to 16 bytes. */
5968 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5969 STACK_BOUNDARY / BITS_PER_UNIT);
5970 off -= vr_saved * UNITS_PER_VREG;
5972 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5974 rtx ptr, mem;
5976 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5977 mem = gen_frame_mem (mode, ptr);
5978 set_mem_alias_set (mem, get_varargs_alias_set ());
5979 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5980 off += UNITS_PER_VREG;
5985 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5986 any complication of having crtl->args.pretend_args_size changed. */
5987 cfun->machine->saved_varargs_size
5988 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5989 STACK_BOUNDARY / BITS_PER_UNIT)
5990 + vr_saved * UNITS_PER_VREG);
5993 static void
5994 aarch64_conditional_register_usage (void)
5996 int i;
5997 if (!TARGET_FLOAT)
5999 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6001 fixed_regs[i] = 1;
6002 call_used_regs[i] = 1;
6007 /* Walk down the type tree of TYPE counting consecutive base elements.
6008 If *MODEP is VOIDmode, then set it to the first valid floating point
6009 type. If a non-floating point type is found, or if a floating point
6010 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6011 otherwise return the count in the sub-tree. */
6012 static int
6013 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6015 enum machine_mode mode;
6016 HOST_WIDE_INT size;
6018 switch (TREE_CODE (type))
6020 case REAL_TYPE:
6021 mode = TYPE_MODE (type);
6022 if (mode != DFmode && mode != SFmode && mode != TFmode)
6023 return -1;
6025 if (*modep == VOIDmode)
6026 *modep = mode;
6028 if (*modep == mode)
6029 return 1;
6031 break;
6033 case COMPLEX_TYPE:
6034 mode = TYPE_MODE (TREE_TYPE (type));
6035 if (mode != DFmode && mode != SFmode && mode != TFmode)
6036 return -1;
6038 if (*modep == VOIDmode)
6039 *modep = mode;
6041 if (*modep == mode)
6042 return 2;
6044 break;
6046 case VECTOR_TYPE:
6047 /* Use V2SImode and V4SImode as representatives of all 64-bit
6048 and 128-bit vector types. */
6049 size = int_size_in_bytes (type);
6050 switch (size)
6052 case 8:
6053 mode = V2SImode;
6054 break;
6055 case 16:
6056 mode = V4SImode;
6057 break;
6058 default:
6059 return -1;
6062 if (*modep == VOIDmode)
6063 *modep = mode;
6065 /* Vector modes are considered to be opaque: two vectors are
6066 equivalent for the purposes of being homogeneous aggregates
6067 if they are the same size. */
6068 if (*modep == mode)
6069 return 1;
6071 break;
6073 case ARRAY_TYPE:
6075 int count;
6076 tree index = TYPE_DOMAIN (type);
6078 /* Can't handle incomplete types. */
6079 if (!COMPLETE_TYPE_P (type))
6080 return -1;
6082 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6083 if (count == -1
6084 || !index
6085 || !TYPE_MAX_VALUE (index)
6086 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6087 || !TYPE_MIN_VALUE (index)
6088 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6089 || count < 0)
6090 return -1;
6092 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6093 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6095 /* There must be no padding. */
6096 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6097 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6098 != count * GET_MODE_BITSIZE (*modep)))
6099 return -1;
6101 return count;
6104 case RECORD_TYPE:
6106 int count = 0;
6107 int sub_count;
6108 tree field;
6110 /* Can't handle incomplete types. */
6111 if (!COMPLETE_TYPE_P (type))
6112 return -1;
6114 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6116 if (TREE_CODE (field) != FIELD_DECL)
6117 continue;
6119 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6120 if (sub_count < 0)
6121 return -1;
6122 count += sub_count;
6125 /* There must be no padding. */
6126 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6127 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6128 != count * GET_MODE_BITSIZE (*modep)))
6129 return -1;
6131 return count;
6134 case UNION_TYPE:
6135 case QUAL_UNION_TYPE:
6137 /* These aren't very interesting except in a degenerate case. */
6138 int count = 0;
6139 int sub_count;
6140 tree field;
6142 /* Can't handle incomplete types. */
6143 if (!COMPLETE_TYPE_P (type))
6144 return -1;
6146 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6148 if (TREE_CODE (field) != FIELD_DECL)
6149 continue;
6151 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6152 if (sub_count < 0)
6153 return -1;
6154 count = count > sub_count ? count : sub_count;
6157 /* There must be no padding. */
6158 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6159 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6160 != count * GET_MODE_BITSIZE (*modep)))
6161 return -1;
6163 return count;
6166 default:
6167 break;
6170 return -1;
6173 /* Return true if we use LRA instead of reload pass. */
6174 static bool
6175 aarch64_lra_p (void)
6177 return aarch64_lra_flag;
6180 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6181 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6182 array types. The C99 floating-point complex types are also considered
6183 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6184 types, which are GCC extensions and out of the scope of AAPCS64, are
6185 treated as composite types here as well.
6187 Note that MODE itself is not sufficient in determining whether a type
6188 is such a composite type or not. This is because
6189 stor-layout.c:compute_record_mode may have already changed the MODE
6190 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6191 structure with only one field may have its MODE set to the mode of the
6192 field. Also an integer mode whose size matches the size of the
6193 RECORD_TYPE type may be used to substitute the original mode
6194 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6195 solely relied on. */
6197 static bool
6198 aarch64_composite_type_p (const_tree type,
6199 enum machine_mode mode)
6201 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6202 return true;
6204 if (mode == BLKmode
6205 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6206 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6207 return true;
6209 return false;
6212 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6213 type as described in AAPCS64 \S 4.1.2.
6215 See the comment above aarch64_composite_type_p for the notes on MODE. */
6217 static bool
6218 aarch64_short_vector_p (const_tree type,
6219 enum machine_mode mode)
6221 HOST_WIDE_INT size = -1;
6223 if (type && TREE_CODE (type) == VECTOR_TYPE)
6224 size = int_size_in_bytes (type);
6225 else if (!aarch64_composite_type_p (type, mode)
6226 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6227 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6228 size = GET_MODE_SIZE (mode);
6230 return (size == 8 || size == 16) ? true : false;
6233 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6234 shall be passed or returned in simd/fp register(s) (providing these
6235 parameter passing registers are available).
6237 Upon successful return, *COUNT returns the number of needed registers,
6238 *BASE_MODE returns the mode of the individual register and when IS_HAF
6239 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6240 floating-point aggregate or a homogeneous short-vector aggregate. */
6242 static bool
6243 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6244 const_tree type,
6245 enum machine_mode *base_mode,
6246 int *count,
6247 bool *is_ha)
6249 enum machine_mode new_mode = VOIDmode;
6250 bool composite_p = aarch64_composite_type_p (type, mode);
6252 if (is_ha != NULL) *is_ha = false;
6254 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6255 || aarch64_short_vector_p (type, mode))
6257 *count = 1;
6258 new_mode = mode;
6260 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6262 if (is_ha != NULL) *is_ha = true;
6263 *count = 2;
6264 new_mode = GET_MODE_INNER (mode);
6266 else if (type && composite_p)
6268 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6270 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6272 if (is_ha != NULL) *is_ha = true;
6273 *count = ag_count;
6275 else
6276 return false;
6278 else
6279 return false;
6281 *base_mode = new_mode;
6282 return true;
6285 /* Implement TARGET_STRUCT_VALUE_RTX. */
6287 static rtx
6288 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6289 int incoming ATTRIBUTE_UNUSED)
6291 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6294 /* Implements target hook vector_mode_supported_p. */
6295 static bool
6296 aarch64_vector_mode_supported_p (enum machine_mode mode)
6298 if (TARGET_SIMD
6299 && (mode == V4SImode || mode == V8HImode
6300 || mode == V16QImode || mode == V2DImode
6301 || mode == V2SImode || mode == V4HImode
6302 || mode == V8QImode || mode == V2SFmode
6303 || mode == V4SFmode || mode == V2DFmode))
6304 return true;
6306 return false;
6309 /* Return appropriate SIMD container
6310 for MODE within a vector of WIDTH bits. */
6311 static enum machine_mode
6312 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6314 gcc_assert (width == 64 || width == 128);
6315 if (TARGET_SIMD)
6317 if (width == 128)
6318 switch (mode)
6320 case DFmode:
6321 return V2DFmode;
6322 case SFmode:
6323 return V4SFmode;
6324 case SImode:
6325 return V4SImode;
6326 case HImode:
6327 return V8HImode;
6328 case QImode:
6329 return V16QImode;
6330 case DImode:
6331 return V2DImode;
6332 default:
6333 break;
6335 else
6336 switch (mode)
6338 case SFmode:
6339 return V2SFmode;
6340 case SImode:
6341 return V2SImode;
6342 case HImode:
6343 return V4HImode;
6344 case QImode:
6345 return V8QImode;
6346 default:
6347 break;
6350 return word_mode;
6353 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6354 static enum machine_mode
6355 aarch64_preferred_simd_mode (enum machine_mode mode)
6357 return aarch64_simd_container_mode (mode, 128);
6360 /* Return the bitmask of possible vector sizes for the vectorizer
6361 to iterate over. */
6362 static unsigned int
6363 aarch64_autovectorize_vector_sizes (void)
6365 return (16 | 8);
6368 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6369 vector types in order to conform to the AAPCS64 (see "Procedure
6370 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6371 qualify for emission with the mangled names defined in that document,
6372 a vector type must not only be of the correct mode but also be
6373 composed of AdvSIMD vector element types (e.g.
6374 _builtin_aarch64_simd_qi); these types are registered by
6375 aarch64_init_simd_builtins (). In other words, vector types defined
6376 in other ways e.g. via vector_size attribute will get default
6377 mangled names. */
6378 typedef struct
6380 enum machine_mode mode;
6381 const char *element_type_name;
6382 const char *mangled_name;
6383 } aarch64_simd_mangle_map_entry;
6385 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6386 /* 64-bit containerized types. */
6387 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6388 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6389 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6390 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6391 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6392 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6393 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6394 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6395 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6396 /* 128-bit containerized types. */
6397 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6398 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6399 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6400 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6401 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6402 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6403 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6404 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6405 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6406 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6407 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6408 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6409 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6410 { VOIDmode, NULL, NULL }
6413 /* Implement TARGET_MANGLE_TYPE. */
6415 static const char *
6416 aarch64_mangle_type (const_tree type)
6418 /* The AArch64 ABI documents say that "__va_list" has to be
6419 managled as if it is in the "std" namespace. */
6420 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6421 return "St9__va_list";
6423 /* Check the mode of the vector type, and the name of the vector
6424 element type, against the table. */
6425 if (TREE_CODE (type) == VECTOR_TYPE)
6427 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6429 while (pos->mode != VOIDmode)
6431 tree elt_type = TREE_TYPE (type);
6433 if (pos->mode == TYPE_MODE (type)
6434 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6435 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6436 pos->element_type_name))
6437 return pos->mangled_name;
6439 pos++;
6443 /* Use the default mangling. */
6444 return NULL;
6447 /* Return the equivalent letter for size. */
6448 static char
6449 sizetochar (int size)
6451 switch (size)
6453 case 64: return 'd';
6454 case 32: return 's';
6455 case 16: return 'h';
6456 case 8 : return 'b';
6457 default: gcc_unreachable ();
6461 /* Return true iff x is a uniform vector of floating-point
6462 constants, and the constant can be represented in
6463 quarter-precision form. Note, as aarch64_float_const_representable
6464 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6465 static bool
6466 aarch64_vect_float_const_representable_p (rtx x)
6468 int i = 0;
6469 REAL_VALUE_TYPE r0, ri;
6470 rtx x0, xi;
6472 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6473 return false;
6475 x0 = CONST_VECTOR_ELT (x, 0);
6476 if (!CONST_DOUBLE_P (x0))
6477 return false;
6479 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6481 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6483 xi = CONST_VECTOR_ELT (x, i);
6484 if (!CONST_DOUBLE_P (xi))
6485 return false;
6487 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6488 if (!REAL_VALUES_EQUAL (r0, ri))
6489 return false;
6492 return aarch64_float_const_representable_p (x0);
6495 /* Return true for valid and false for invalid. */
6496 bool
6497 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6498 struct simd_immediate_info *info)
6500 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6501 matches = 1; \
6502 for (i = 0; i < idx; i += (STRIDE)) \
6503 if (!(TEST)) \
6504 matches = 0; \
6505 if (matches) \
6507 immtype = (CLASS); \
6508 elsize = (ELSIZE); \
6509 eshift = (SHIFT); \
6510 emvn = (NEG); \
6511 break; \
6514 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6515 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6516 unsigned char bytes[16];
6517 int immtype = -1, matches;
6518 unsigned int invmask = inverse ? 0xff : 0;
6519 int eshift, emvn;
6521 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6523 if (! (aarch64_simd_imm_zero_p (op, mode)
6524 || aarch64_vect_float_const_representable_p (op)))
6525 return false;
6527 if (info)
6529 info->value = CONST_VECTOR_ELT (op, 0);
6530 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6531 info->mvn = false;
6532 info->shift = 0;
6535 return true;
6538 /* Splat vector constant out into a byte vector. */
6539 for (i = 0; i < n_elts; i++)
6541 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
6542 it must be laid out in the vector register in reverse order. */
6543 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
6544 unsigned HOST_WIDE_INT elpart;
6545 unsigned int part, parts;
6547 if (GET_CODE (el) == CONST_INT)
6549 elpart = INTVAL (el);
6550 parts = 1;
6552 else if (GET_CODE (el) == CONST_DOUBLE)
6554 elpart = CONST_DOUBLE_LOW (el);
6555 parts = 2;
6557 else
6558 gcc_unreachable ();
6560 for (part = 0; part < parts; part++)
6562 unsigned int byte;
6563 for (byte = 0; byte < innersize; byte++)
6565 bytes[idx++] = (elpart & 0xff) ^ invmask;
6566 elpart >>= BITS_PER_UNIT;
6568 if (GET_CODE (el) == CONST_DOUBLE)
6569 elpart = CONST_DOUBLE_HIGH (el);
6573 /* Sanity check. */
6574 gcc_assert (idx == GET_MODE_SIZE (mode));
6578 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6579 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6581 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6582 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6584 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6585 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6587 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6588 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6590 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6592 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6594 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6595 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6597 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6598 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6600 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6601 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6603 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6604 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6606 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6608 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6610 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6611 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6613 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6614 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6616 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6617 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6619 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6620 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6622 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6624 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6625 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6627 while (0);
6629 if (immtype == -1)
6630 return false;
6632 if (info)
6634 info->element_width = elsize;
6635 info->mvn = emvn != 0;
6636 info->shift = eshift;
6638 unsigned HOST_WIDE_INT imm = 0;
6640 if (immtype >= 12 && immtype <= 15)
6641 info->msl = true;
6643 /* Un-invert bytes of recognized vector, if necessary. */
6644 if (invmask != 0)
6645 for (i = 0; i < idx; i++)
6646 bytes[i] ^= invmask;
6648 if (immtype == 17)
6650 /* FIXME: Broken on 32-bit H_W_I hosts. */
6651 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6653 for (i = 0; i < 8; i++)
6654 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6655 << (i * BITS_PER_UNIT);
6658 info->value = GEN_INT (imm);
6660 else
6662 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6663 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6665 /* Construct 'abcdefgh' because the assembler cannot handle
6666 generic constants. */
6667 if (info->mvn)
6668 imm = ~imm;
6669 imm = (imm >> info->shift) & 0xff;
6670 info->value = GEN_INT (imm);
6674 return true;
6675 #undef CHECK
6678 static bool
6679 aarch64_const_vec_all_same_int_p (rtx x,
6680 HOST_WIDE_INT minval,
6681 HOST_WIDE_INT maxval)
6683 HOST_WIDE_INT firstval;
6684 int count, i;
6686 if (GET_CODE (x) != CONST_VECTOR
6687 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6688 return false;
6690 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6691 if (firstval < minval || firstval > maxval)
6692 return false;
6694 count = CONST_VECTOR_NUNITS (x);
6695 for (i = 1; i < count; i++)
6696 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6697 return false;
6699 return true;
6702 /* Check of immediate shift constants are within range. */
6703 bool
6704 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6706 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6707 if (left)
6708 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6709 else
6710 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6713 /* Return true if X is a uniform vector where all elements
6714 are either the floating-point constant 0.0 or the
6715 integer constant 0. */
6716 bool
6717 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6719 return x == CONST0_RTX (mode);
6722 bool
6723 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6725 HOST_WIDE_INT imm = INTVAL (x);
6726 int i;
6728 for (i = 0; i < 8; i++)
6730 unsigned int byte = imm & 0xff;
6731 if (byte != 0xff && byte != 0)
6732 return false;
6733 imm >>= 8;
6736 return true;
6739 bool
6740 aarch64_mov_operand_p (rtx x,
6741 enum aarch64_symbol_context context,
6742 enum machine_mode mode)
6744 if (GET_CODE (x) == HIGH
6745 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6746 return true;
6748 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6749 return true;
6751 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6752 return true;
6754 return aarch64_classify_symbolic_expression (x, context)
6755 == SYMBOL_TINY_ABSOLUTE;
6758 /* Return a const_int vector of VAL. */
6760 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6762 int nunits = GET_MODE_NUNITS (mode);
6763 rtvec v = rtvec_alloc (nunits);
6764 int i;
6766 for (i=0; i < nunits; i++)
6767 RTVEC_ELT (v, i) = GEN_INT (val);
6769 return gen_rtx_CONST_VECTOR (mode, v);
6772 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6774 bool
6775 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6777 enum machine_mode vmode;
6779 gcc_assert (!VECTOR_MODE_P (mode));
6780 vmode = aarch64_preferred_simd_mode (mode);
6781 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6782 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6785 /* Construct and return a PARALLEL RTX vector. */
6787 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6789 int nunits = GET_MODE_NUNITS (mode);
6790 rtvec v = rtvec_alloc (nunits / 2);
6791 int base = high ? nunits / 2 : 0;
6792 rtx t1;
6793 int i;
6795 for (i=0; i < nunits / 2; i++)
6796 RTVEC_ELT (v, i) = GEN_INT (base + i);
6798 t1 = gen_rtx_PARALLEL (mode, v);
6799 return t1;
6802 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6803 HIGH (exclusive). */
6804 void
6805 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6807 HOST_WIDE_INT lane;
6808 gcc_assert (GET_CODE (operand) == CONST_INT);
6809 lane = INTVAL (operand);
6811 if (lane < low || lane >= high)
6812 error ("lane out of range");
6815 void
6816 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6818 gcc_assert (GET_CODE (operand) == CONST_INT);
6819 HOST_WIDE_INT lane = INTVAL (operand);
6821 if (lane < low || lane >= high)
6822 error ("constant out of range");
6825 /* Emit code to reinterpret one AdvSIMD type as another,
6826 without altering bits. */
6827 void
6828 aarch64_simd_reinterpret (rtx dest, rtx src)
6830 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6833 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6834 registers). */
6835 void
6836 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6837 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6838 rtx op1)
6840 rtx mem = gen_rtx_MEM (mode, destaddr);
6841 rtx tmp1 = gen_reg_rtx (mode);
6842 rtx tmp2 = gen_reg_rtx (mode);
6844 emit_insn (intfn (tmp1, op1, tmp2));
6846 emit_move_insn (mem, tmp1);
6847 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6848 emit_move_insn (mem, tmp2);
6851 /* Return TRUE if OP is a valid vector addressing mode. */
6852 bool
6853 aarch64_simd_mem_operand_p (rtx op)
6855 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6856 || GET_CODE (XEXP (op, 0)) == REG);
6859 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6860 not to early-clobber SRC registers in the process.
6862 We assume that the operands described by SRC and DEST represent a
6863 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6864 number of components into which the copy has been decomposed. */
6865 void
6866 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6867 rtx *src, unsigned int count)
6869 unsigned int i;
6871 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6872 || REGNO (operands[0]) < REGNO (operands[1]))
6874 for (i = 0; i < count; i++)
6876 operands[2 * i] = dest[i];
6877 operands[2 * i + 1] = src[i];
6880 else
6882 for (i = 0; i < count; i++)
6884 operands[2 * i] = dest[count - i - 1];
6885 operands[2 * i + 1] = src[count - i - 1];
6890 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6891 one of VSTRUCT modes: OI, CI or XI. */
6893 aarch64_simd_attr_length_move (rtx insn)
6895 enum machine_mode mode;
6897 extract_insn_cached (insn);
6899 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6901 mode = GET_MODE (recog_data.operand[0]);
6902 switch (mode)
6904 case OImode:
6905 return 8;
6906 case CImode:
6907 return 12;
6908 case XImode:
6909 return 16;
6910 default:
6911 gcc_unreachable ();
6914 return 4;
6917 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6918 alignment of a vector to 128 bits. */
6919 static HOST_WIDE_INT
6920 aarch64_simd_vector_alignment (const_tree type)
6922 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6923 return MIN (align, 128);
6926 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6927 static bool
6928 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6930 if (is_packed)
6931 return false;
6933 /* We guarantee alignment for vectors up to 128-bits. */
6934 if (tree_int_cst_compare (TYPE_SIZE (type),
6935 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6936 return false;
6938 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6939 return true;
6942 /* If VALS is a vector constant that can be loaded into a register
6943 using DUP, generate instructions to do so and return an RTX to
6944 assign to the register. Otherwise return NULL_RTX. */
6945 static rtx
6946 aarch64_simd_dup_constant (rtx vals)
6948 enum machine_mode mode = GET_MODE (vals);
6949 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6950 int n_elts = GET_MODE_NUNITS (mode);
6951 bool all_same = true;
6952 rtx x;
6953 int i;
6955 if (GET_CODE (vals) != CONST_VECTOR)
6956 return NULL_RTX;
6958 for (i = 1; i < n_elts; ++i)
6960 x = CONST_VECTOR_ELT (vals, i);
6961 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6962 all_same = false;
6965 if (!all_same)
6966 return NULL_RTX;
6968 /* We can load this constant by using DUP and a constant in a
6969 single ARM register. This will be cheaper than a vector
6970 load. */
6971 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6972 return gen_rtx_VEC_DUPLICATE (mode, x);
6976 /* Generate code to load VALS, which is a PARALLEL containing only
6977 constants (for vec_init) or CONST_VECTOR, efficiently into a
6978 register. Returns an RTX to copy into the register, or NULL_RTX
6979 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6980 static rtx
6981 aarch64_simd_make_constant (rtx vals)
6983 enum machine_mode mode = GET_MODE (vals);
6984 rtx const_dup;
6985 rtx const_vec = NULL_RTX;
6986 int n_elts = GET_MODE_NUNITS (mode);
6987 int n_const = 0;
6988 int i;
6990 if (GET_CODE (vals) == CONST_VECTOR)
6991 const_vec = vals;
6992 else if (GET_CODE (vals) == PARALLEL)
6994 /* A CONST_VECTOR must contain only CONST_INTs and
6995 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6996 Only store valid constants in a CONST_VECTOR. */
6997 for (i = 0; i < n_elts; ++i)
6999 rtx x = XVECEXP (vals, 0, i);
7000 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7001 n_const++;
7003 if (n_const == n_elts)
7004 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7006 else
7007 gcc_unreachable ();
7009 if (const_vec != NULL_RTX
7010 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7011 /* Load using MOVI/MVNI. */
7012 return const_vec;
7013 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7014 /* Loaded using DUP. */
7015 return const_dup;
7016 else if (const_vec != NULL_RTX)
7017 /* Load from constant pool. We can not take advantage of single-cycle
7018 LD1 because we need a PC-relative addressing mode. */
7019 return const_vec;
7020 else
7021 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7022 We can not construct an initializer. */
7023 return NULL_RTX;
7026 void
7027 aarch64_expand_vector_init (rtx target, rtx vals)
7029 enum machine_mode mode = GET_MODE (target);
7030 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7031 int n_elts = GET_MODE_NUNITS (mode);
7032 int n_var = 0, one_var = -1;
7033 bool all_same = true;
7034 rtx x, mem;
7035 int i;
7037 x = XVECEXP (vals, 0, 0);
7038 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7039 n_var = 1, one_var = 0;
7041 for (i = 1; i < n_elts; ++i)
7043 x = XVECEXP (vals, 0, i);
7044 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7045 ++n_var, one_var = i;
7047 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7048 all_same = false;
7051 if (n_var == 0)
7053 rtx constant = aarch64_simd_make_constant (vals);
7054 if (constant != NULL_RTX)
7056 emit_move_insn (target, constant);
7057 return;
7061 /* Splat a single non-constant element if we can. */
7062 if (all_same)
7064 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7065 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7066 return;
7069 /* One field is non-constant. Load constant then overwrite varying
7070 field. This is more efficient than using the stack. */
7071 if (n_var == 1)
7073 rtx copy = copy_rtx (vals);
7074 rtx index = GEN_INT (one_var);
7075 enum insn_code icode;
7077 /* Load constant part of vector, substitute neighboring value for
7078 varying element. */
7079 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7080 aarch64_expand_vector_init (target, copy);
7082 /* Insert variable. */
7083 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7084 icode = optab_handler (vec_set_optab, mode);
7085 gcc_assert (icode != CODE_FOR_nothing);
7086 emit_insn (GEN_FCN (icode) (target, x, index));
7087 return;
7090 /* Construct the vector in memory one field at a time
7091 and load the whole vector. */
7092 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7093 for (i = 0; i < n_elts; i++)
7094 emit_move_insn (adjust_address_nv (mem, inner_mode,
7095 i * GET_MODE_SIZE (inner_mode)),
7096 XVECEXP (vals, 0, i));
7097 emit_move_insn (target, mem);
7101 static unsigned HOST_WIDE_INT
7102 aarch64_shift_truncation_mask (enum machine_mode mode)
7104 return
7105 (aarch64_vector_mode_supported_p (mode)
7106 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7109 #ifndef TLS_SECTION_ASM_FLAG
7110 #define TLS_SECTION_ASM_FLAG 'T'
7111 #endif
7113 void
7114 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7115 tree decl ATTRIBUTE_UNUSED)
7117 char flagchars[10], *f = flagchars;
7119 /* If we have already declared this section, we can use an
7120 abbreviated form to switch back to it -- unless this section is
7121 part of a COMDAT groups, in which case GAS requires the full
7122 declaration every time. */
7123 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7124 && (flags & SECTION_DECLARED))
7126 fprintf (asm_out_file, "\t.section\t%s\n", name);
7127 return;
7130 if (!(flags & SECTION_DEBUG))
7131 *f++ = 'a';
7132 if (flags & SECTION_WRITE)
7133 *f++ = 'w';
7134 if (flags & SECTION_CODE)
7135 *f++ = 'x';
7136 if (flags & SECTION_SMALL)
7137 *f++ = 's';
7138 if (flags & SECTION_MERGE)
7139 *f++ = 'M';
7140 if (flags & SECTION_STRINGS)
7141 *f++ = 'S';
7142 if (flags & SECTION_TLS)
7143 *f++ = TLS_SECTION_ASM_FLAG;
7144 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7145 *f++ = 'G';
7146 *f = '\0';
7148 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7150 if (!(flags & SECTION_NOTYPE))
7152 const char *type;
7153 const char *format;
7155 if (flags & SECTION_BSS)
7156 type = "nobits";
7157 else
7158 type = "progbits";
7160 #ifdef TYPE_OPERAND_FMT
7161 format = "," TYPE_OPERAND_FMT;
7162 #else
7163 format = ",@%s";
7164 #endif
7166 fprintf (asm_out_file, format, type);
7168 if (flags & SECTION_ENTSIZE)
7169 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7170 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7172 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7173 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7174 else
7175 fprintf (asm_out_file, ",%s,comdat",
7176 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7180 putc ('\n', asm_out_file);
7183 /* Select a format to encode pointers in exception handling data. */
7185 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7187 int type;
7188 switch (aarch64_cmodel)
7190 case AARCH64_CMODEL_TINY:
7191 case AARCH64_CMODEL_TINY_PIC:
7192 case AARCH64_CMODEL_SMALL:
7193 case AARCH64_CMODEL_SMALL_PIC:
7194 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7195 for everything. */
7196 type = DW_EH_PE_sdata4;
7197 break;
7198 default:
7199 /* No assumptions here. 8-byte relocs required. */
7200 type = DW_EH_PE_sdata8;
7201 break;
7203 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7206 /* Emit load exclusive. */
7208 static void
7209 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7210 rtx mem, rtx model_rtx)
7212 rtx (*gen) (rtx, rtx, rtx);
7214 switch (mode)
7216 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7217 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7218 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7219 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7220 default:
7221 gcc_unreachable ();
7224 emit_insn (gen (rval, mem, model_rtx));
7227 /* Emit store exclusive. */
7229 static void
7230 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7231 rtx rval, rtx mem, rtx model_rtx)
7233 rtx (*gen) (rtx, rtx, rtx, rtx);
7235 switch (mode)
7237 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7238 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7239 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7240 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7241 default:
7242 gcc_unreachable ();
7245 emit_insn (gen (bval, rval, mem, model_rtx));
7248 /* Mark the previous jump instruction as unlikely. */
7250 static void
7251 aarch64_emit_unlikely_jump (rtx insn)
7253 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7255 insn = emit_jump_insn (insn);
7256 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7259 /* Expand a compare and swap pattern. */
7261 void
7262 aarch64_expand_compare_and_swap (rtx operands[])
7264 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7265 enum machine_mode mode, cmp_mode;
7266 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7268 bval = operands[0];
7269 rval = operands[1];
7270 mem = operands[2];
7271 oldval = operands[3];
7272 newval = operands[4];
7273 is_weak = operands[5];
7274 mod_s = operands[6];
7275 mod_f = operands[7];
7276 mode = GET_MODE (mem);
7277 cmp_mode = mode;
7279 /* Normally the succ memory model must be stronger than fail, but in the
7280 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7281 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7283 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7284 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7285 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7287 switch (mode)
7289 case QImode:
7290 case HImode:
7291 /* For short modes, we're going to perform the comparison in SImode,
7292 so do the zero-extension now. */
7293 cmp_mode = SImode;
7294 rval = gen_reg_rtx (SImode);
7295 oldval = convert_modes (SImode, mode, oldval, true);
7296 /* Fall through. */
7298 case SImode:
7299 case DImode:
7300 /* Force the value into a register if needed. */
7301 if (!aarch64_plus_operand (oldval, mode))
7302 oldval = force_reg (cmp_mode, oldval);
7303 break;
7305 default:
7306 gcc_unreachable ();
7309 switch (mode)
7311 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7312 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7313 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7314 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7315 default:
7316 gcc_unreachable ();
7319 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7321 if (mode == QImode || mode == HImode)
7322 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7324 x = gen_rtx_REG (CCmode, CC_REGNUM);
7325 x = gen_rtx_EQ (SImode, x, const0_rtx);
7326 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7329 /* Split a compare and swap pattern. */
7331 void
7332 aarch64_split_compare_and_swap (rtx operands[])
7334 rtx rval, mem, oldval, newval, scratch;
7335 enum machine_mode mode;
7336 bool is_weak;
7337 rtx label1, label2, x, cond;
7339 rval = operands[0];
7340 mem = operands[1];
7341 oldval = operands[2];
7342 newval = operands[3];
7343 is_weak = (operands[4] != const0_rtx);
7344 scratch = operands[7];
7345 mode = GET_MODE (mem);
7347 label1 = NULL_RTX;
7348 if (!is_weak)
7350 label1 = gen_label_rtx ();
7351 emit_label (label1);
7353 label2 = gen_label_rtx ();
7355 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7357 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7358 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7359 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7360 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7361 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7363 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7365 if (!is_weak)
7367 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7368 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7369 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7370 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7372 else
7374 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7375 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7376 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7379 emit_label (label2);
7382 /* Split an atomic operation. */
7384 void
7385 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7386 rtx value, rtx model_rtx, rtx cond)
7388 enum machine_mode mode = GET_MODE (mem);
7389 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7390 rtx label, x;
7392 label = gen_label_rtx ();
7393 emit_label (label);
7395 if (new_out)
7396 new_out = gen_lowpart (wmode, new_out);
7397 if (old_out)
7398 old_out = gen_lowpart (wmode, old_out);
7399 else
7400 old_out = new_out;
7401 value = simplify_gen_subreg (wmode, value, mode, 0);
7403 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7405 switch (code)
7407 case SET:
7408 new_out = value;
7409 break;
7411 case NOT:
7412 x = gen_rtx_AND (wmode, old_out, value);
7413 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7414 x = gen_rtx_NOT (wmode, new_out);
7415 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7416 break;
7418 case MINUS:
7419 if (CONST_INT_P (value))
7421 value = GEN_INT (-INTVAL (value));
7422 code = PLUS;
7424 /* Fall through. */
7426 default:
7427 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7428 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7429 break;
7432 aarch64_emit_store_exclusive (mode, cond, mem,
7433 gen_lowpart (mode, new_out), model_rtx);
7435 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7436 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7437 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7438 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7441 static void
7442 aarch64_print_extension (void)
7444 const struct aarch64_option_extension *opt = NULL;
7446 for (opt = all_extensions; opt->name != NULL; opt++)
7447 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7448 asm_fprintf (asm_out_file, "+%s", opt->name);
7450 asm_fprintf (asm_out_file, "\n");
7453 static void
7454 aarch64_start_file (void)
7456 if (selected_arch)
7458 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7459 aarch64_print_extension ();
7461 else if (selected_cpu)
7463 const char *truncated_name
7464 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7465 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7466 aarch64_print_extension ();
7468 default_file_start();
7471 /* Target hook for c_mode_for_suffix. */
7472 static enum machine_mode
7473 aarch64_c_mode_for_suffix (char suffix)
7475 if (suffix == 'q')
7476 return TFmode;
7478 return VOIDmode;
7481 /* We can only represent floating point constants which will fit in
7482 "quarter-precision" values. These values are characterised by
7483 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7486 (-1)^s * (n/16) * 2^r
7488 Where:
7489 's' is the sign bit.
7490 'n' is an integer in the range 16 <= n <= 31.
7491 'r' is an integer in the range -3 <= r <= 4. */
7493 /* Return true iff X can be represented by a quarter-precision
7494 floating point immediate operand X. Note, we cannot represent 0.0. */
7495 bool
7496 aarch64_float_const_representable_p (rtx x)
7498 /* This represents our current view of how many bits
7499 make up the mantissa. */
7500 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7501 int exponent;
7502 unsigned HOST_WIDE_INT mantissa, mask;
7503 HOST_WIDE_INT m1, m2;
7504 REAL_VALUE_TYPE r, m;
7506 if (!CONST_DOUBLE_P (x))
7507 return false;
7509 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7511 /* We cannot represent infinities, NaNs or +/-zero. We won't
7512 know if we have +zero until we analyse the mantissa, but we
7513 can reject the other invalid values. */
7514 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7515 || REAL_VALUE_MINUS_ZERO (r))
7516 return false;
7518 /* Extract exponent. */
7519 r = real_value_abs (&r);
7520 exponent = REAL_EXP (&r);
7522 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7523 highest (sign) bit, with a fixed binary point at bit point_pos.
7524 m1 holds the low part of the mantissa, m2 the high part.
7525 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7526 bits for the mantissa, this can fail (low bits will be lost). */
7527 real_ldexp (&m, &r, point_pos - exponent);
7528 REAL_VALUE_TO_INT (&m1, &m2, m);
7530 /* If the low part of the mantissa has bits set we cannot represent
7531 the value. */
7532 if (m1 != 0)
7533 return false;
7534 /* We have rejected the lower HOST_WIDE_INT, so update our
7535 understanding of how many bits lie in the mantissa and
7536 look only at the high HOST_WIDE_INT. */
7537 mantissa = m2;
7538 point_pos -= HOST_BITS_PER_WIDE_INT;
7540 /* We can only represent values with a mantissa of the form 1.xxxx. */
7541 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7542 if ((mantissa & mask) != 0)
7543 return false;
7545 /* Having filtered unrepresentable values, we may now remove all
7546 but the highest 5 bits. */
7547 mantissa >>= point_pos - 5;
7549 /* We cannot represent the value 0.0, so reject it. This is handled
7550 elsewhere. */
7551 if (mantissa == 0)
7552 return false;
7554 /* Then, as bit 4 is always set, we can mask it off, leaving
7555 the mantissa in the range [0, 15]. */
7556 mantissa &= ~(1 << 4);
7557 gcc_assert (mantissa <= 15);
7559 /* GCC internally does not use IEEE754-like encoding (where normalized
7560 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7561 Our mantissa values are shifted 4 places to the left relative to
7562 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7563 by 5 places to correct for GCC's representation. */
7564 exponent = 5 - exponent;
7566 return (exponent >= 0 && exponent <= 7);
7569 char*
7570 aarch64_output_simd_mov_immediate (rtx const_vector,
7571 enum machine_mode mode,
7572 unsigned width)
7574 bool is_valid;
7575 static char templ[40];
7576 const char *mnemonic;
7577 const char *shift_op;
7578 unsigned int lane_count = 0;
7579 char element_char;
7581 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7583 /* This will return true to show const_vector is legal for use as either
7584 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7585 also update INFO to show how the immediate should be generated. */
7586 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7587 gcc_assert (is_valid);
7589 element_char = sizetochar (info.element_width);
7590 lane_count = width / info.element_width;
7592 mode = GET_MODE_INNER (mode);
7593 if (mode == SFmode || mode == DFmode)
7595 gcc_assert (info.shift == 0 && ! info.mvn);
7596 if (aarch64_float_const_zero_rtx_p (info.value))
7597 info.value = GEN_INT (0);
7598 else
7600 #define buf_size 20
7601 REAL_VALUE_TYPE r;
7602 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7603 char float_buf[buf_size] = {'\0'};
7604 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7605 #undef buf_size
7607 if (lane_count == 1)
7608 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7609 else
7610 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7611 lane_count, element_char, float_buf);
7612 return templ;
7616 mnemonic = info.mvn ? "mvni" : "movi";
7617 shift_op = info.msl ? "msl" : "lsl";
7619 if (lane_count == 1)
7620 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7621 mnemonic, UINTVAL (info.value));
7622 else if (info.shift)
7623 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7624 ", %s %d", mnemonic, lane_count, element_char,
7625 UINTVAL (info.value), shift_op, info.shift);
7626 else
7627 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7628 mnemonic, lane_count, element_char, UINTVAL (info.value));
7629 return templ;
7632 char*
7633 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7634 enum machine_mode mode)
7636 enum machine_mode vmode;
7638 gcc_assert (!VECTOR_MODE_P (mode));
7639 vmode = aarch64_simd_container_mode (mode, 64);
7640 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7641 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7644 /* Split operands into moves from op[1] + op[2] into op[0]. */
7646 void
7647 aarch64_split_combinev16qi (rtx operands[3])
7649 unsigned int dest = REGNO (operands[0]);
7650 unsigned int src1 = REGNO (operands[1]);
7651 unsigned int src2 = REGNO (operands[2]);
7652 enum machine_mode halfmode = GET_MODE (operands[1]);
7653 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7654 rtx destlo, desthi;
7656 gcc_assert (halfmode == V16QImode);
7658 if (src1 == dest && src2 == dest + halfregs)
7660 /* No-op move. Can't split to nothing; emit something. */
7661 emit_note (NOTE_INSN_DELETED);
7662 return;
7665 /* Preserve register attributes for variable tracking. */
7666 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7667 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7668 GET_MODE_SIZE (halfmode));
7670 /* Special case of reversed high/low parts. */
7671 if (reg_overlap_mentioned_p (operands[2], destlo)
7672 && reg_overlap_mentioned_p (operands[1], desthi))
7674 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7675 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7676 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7678 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7680 /* Try to avoid unnecessary moves if part of the result
7681 is in the right place already. */
7682 if (src1 != dest)
7683 emit_move_insn (destlo, operands[1]);
7684 if (src2 != dest + halfregs)
7685 emit_move_insn (desthi, operands[2]);
7687 else
7689 if (src2 != dest + halfregs)
7690 emit_move_insn (desthi, operands[2]);
7691 if (src1 != dest)
7692 emit_move_insn (destlo, operands[1]);
7696 /* vec_perm support. */
7698 #define MAX_VECT_LEN 16
7700 struct expand_vec_perm_d
7702 rtx target, op0, op1;
7703 unsigned char perm[MAX_VECT_LEN];
7704 enum machine_mode vmode;
7705 unsigned char nelt;
7706 bool one_vector_p;
7707 bool testing_p;
7710 /* Generate a variable permutation. */
7712 static void
7713 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7715 enum machine_mode vmode = GET_MODE (target);
7716 bool one_vector_p = rtx_equal_p (op0, op1);
7718 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7719 gcc_checking_assert (GET_MODE (op0) == vmode);
7720 gcc_checking_assert (GET_MODE (op1) == vmode);
7721 gcc_checking_assert (GET_MODE (sel) == vmode);
7722 gcc_checking_assert (TARGET_SIMD);
7724 if (one_vector_p)
7726 if (vmode == V8QImode)
7728 /* Expand the argument to a V16QI mode by duplicating it. */
7729 rtx pair = gen_reg_rtx (V16QImode);
7730 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7731 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7733 else
7735 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7738 else
7740 rtx pair;
7742 if (vmode == V8QImode)
7744 pair = gen_reg_rtx (V16QImode);
7745 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7746 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7748 else
7750 pair = gen_reg_rtx (OImode);
7751 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7752 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7757 void
7758 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7760 enum machine_mode vmode = GET_MODE (target);
7761 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7762 bool one_vector_p = rtx_equal_p (op0, op1);
7763 rtx rmask[MAX_VECT_LEN], mask;
7765 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7767 /* The TBL instruction does not use a modulo index, so we must take care
7768 of that ourselves. */
7769 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7770 for (i = 0; i < nelt; ++i)
7771 rmask[i] = mask;
7772 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7773 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7775 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7778 /* Recognize patterns suitable for the TRN instructions. */
7779 static bool
7780 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7782 unsigned int i, odd, mask, nelt = d->nelt;
7783 rtx out, in0, in1, x;
7784 rtx (*gen) (rtx, rtx, rtx);
7785 enum machine_mode vmode = d->vmode;
7787 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7788 return false;
7790 /* Note that these are little-endian tests.
7791 We correct for big-endian later. */
7792 if (d->perm[0] == 0)
7793 odd = 0;
7794 else if (d->perm[0] == 1)
7795 odd = 1;
7796 else
7797 return false;
7798 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7800 for (i = 0; i < nelt; i += 2)
7802 if (d->perm[i] != i + odd)
7803 return false;
7804 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7805 return false;
7808 /* Success! */
7809 if (d->testing_p)
7810 return true;
7812 in0 = d->op0;
7813 in1 = d->op1;
7814 if (BYTES_BIG_ENDIAN)
7816 x = in0, in0 = in1, in1 = x;
7817 odd = !odd;
7819 out = d->target;
7821 if (odd)
7823 switch (vmode)
7825 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7826 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7827 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7828 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7829 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7830 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7831 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7832 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7833 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7834 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7835 default:
7836 return false;
7839 else
7841 switch (vmode)
7843 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7844 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7845 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7846 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7847 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7848 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7849 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7850 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7851 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7852 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7853 default:
7854 return false;
7858 emit_insn (gen (out, in0, in1));
7859 return true;
7862 /* Recognize patterns suitable for the UZP instructions. */
7863 static bool
7864 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7866 unsigned int i, odd, mask, nelt = d->nelt;
7867 rtx out, in0, in1, x;
7868 rtx (*gen) (rtx, rtx, rtx);
7869 enum machine_mode vmode = d->vmode;
7871 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7872 return false;
7874 /* Note that these are little-endian tests.
7875 We correct for big-endian later. */
7876 if (d->perm[0] == 0)
7877 odd = 0;
7878 else if (d->perm[0] == 1)
7879 odd = 1;
7880 else
7881 return false;
7882 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7884 for (i = 0; i < nelt; i++)
7886 unsigned elt = (i * 2 + odd) & mask;
7887 if (d->perm[i] != elt)
7888 return false;
7891 /* Success! */
7892 if (d->testing_p)
7893 return true;
7895 in0 = d->op0;
7896 in1 = d->op1;
7897 if (BYTES_BIG_ENDIAN)
7899 x = in0, in0 = in1, in1 = x;
7900 odd = !odd;
7902 out = d->target;
7904 if (odd)
7906 switch (vmode)
7908 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7909 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7910 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7911 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7912 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7913 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7914 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7915 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7916 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7917 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7918 default:
7919 return false;
7922 else
7924 switch (vmode)
7926 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7927 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7928 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7929 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7930 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7931 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7932 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7933 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7934 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7935 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7936 default:
7937 return false;
7941 emit_insn (gen (out, in0, in1));
7942 return true;
7945 /* Recognize patterns suitable for the ZIP instructions. */
7946 static bool
7947 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7949 unsigned int i, high, mask, nelt = d->nelt;
7950 rtx out, in0, in1, x;
7951 rtx (*gen) (rtx, rtx, rtx);
7952 enum machine_mode vmode = d->vmode;
7954 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7955 return false;
7957 /* Note that these are little-endian tests.
7958 We correct for big-endian later. */
7959 high = nelt / 2;
7960 if (d->perm[0] == high)
7961 /* Do Nothing. */
7963 else if (d->perm[0] == 0)
7964 high = 0;
7965 else
7966 return false;
7967 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7969 for (i = 0; i < nelt / 2; i++)
7971 unsigned elt = (i + high) & mask;
7972 if (d->perm[i * 2] != elt)
7973 return false;
7974 elt = (elt + nelt) & mask;
7975 if (d->perm[i * 2 + 1] != elt)
7976 return false;
7979 /* Success! */
7980 if (d->testing_p)
7981 return true;
7983 in0 = d->op0;
7984 in1 = d->op1;
7985 if (BYTES_BIG_ENDIAN)
7987 x = in0, in0 = in1, in1 = x;
7988 high = !high;
7990 out = d->target;
7992 if (high)
7994 switch (vmode)
7996 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7997 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7998 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7999 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8000 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8001 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8002 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8003 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8004 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8005 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8006 default:
8007 return false;
8010 else
8012 switch (vmode)
8014 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8015 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8016 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8017 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8018 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8019 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8020 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8021 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8022 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8023 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8024 default:
8025 return false;
8029 emit_insn (gen (out, in0, in1));
8030 return true;
8033 static bool
8034 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8036 rtx (*gen) (rtx, rtx, rtx);
8037 rtx out = d->target;
8038 rtx in0;
8039 enum machine_mode vmode = d->vmode;
8040 unsigned int i, elt, nelt = d->nelt;
8041 rtx lane;
8043 /* TODO: This may not be big-endian safe. */
8044 if (BYTES_BIG_ENDIAN)
8045 return false;
8047 elt = d->perm[0];
8048 for (i = 1; i < nelt; i++)
8050 if (elt != d->perm[i])
8051 return false;
8054 /* The generic preparation in aarch64_expand_vec_perm_const_1
8055 swaps the operand order and the permute indices if it finds
8056 d->perm[0] to be in the second operand. Thus, we can always
8057 use d->op0 and need not do any extra arithmetic to get the
8058 correct lane number. */
8059 in0 = d->op0;
8060 lane = GEN_INT (elt);
8062 switch (vmode)
8064 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8065 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8066 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8067 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8068 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8069 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8070 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8071 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8072 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8073 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8074 default:
8075 return false;
8078 emit_insn (gen (out, in0, lane));
8079 return true;
8082 static bool
8083 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8085 rtx rperm[MAX_VECT_LEN], sel;
8086 enum machine_mode vmode = d->vmode;
8087 unsigned int i, nelt = d->nelt;
8089 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8090 numbering of elements for big-endian, we must reverse the order. */
8091 if (BYTES_BIG_ENDIAN)
8092 return false;
8094 if (d->testing_p)
8095 return true;
8097 /* Generic code will try constant permutation twice. Once with the
8098 original mode and again with the elements lowered to QImode.
8099 So wait and don't do the selector expansion ourselves. */
8100 if (vmode != V8QImode && vmode != V16QImode)
8101 return false;
8103 for (i = 0; i < nelt; ++i)
8104 rperm[i] = GEN_INT (d->perm[i]);
8105 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8106 sel = force_reg (vmode, sel);
8108 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8109 return true;
8112 static bool
8113 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8115 /* The pattern matching functions above are written to look for a small
8116 number to begin the sequence (0, 1, N/2). If we begin with an index
8117 from the second operand, we can swap the operands. */
8118 if (d->perm[0] >= d->nelt)
8120 unsigned i, nelt = d->nelt;
8121 rtx x;
8123 for (i = 0; i < nelt; ++i)
8124 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8126 x = d->op0;
8127 d->op0 = d->op1;
8128 d->op1 = x;
8131 if (TARGET_SIMD)
8133 if (aarch64_evpc_zip (d))
8134 return true;
8135 else if (aarch64_evpc_uzp (d))
8136 return true;
8137 else if (aarch64_evpc_trn (d))
8138 return true;
8139 else if (aarch64_evpc_dup (d))
8140 return true;
8141 return aarch64_evpc_tbl (d);
8143 return false;
8146 /* Expand a vec_perm_const pattern. */
8148 bool
8149 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8151 struct expand_vec_perm_d d;
8152 int i, nelt, which;
8154 d.target = target;
8155 d.op0 = op0;
8156 d.op1 = op1;
8158 d.vmode = GET_MODE (target);
8159 gcc_assert (VECTOR_MODE_P (d.vmode));
8160 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8161 d.testing_p = false;
8163 for (i = which = 0; i < nelt; ++i)
8165 rtx e = XVECEXP (sel, 0, i);
8166 int ei = INTVAL (e) & (2 * nelt - 1);
8167 which |= (ei < nelt ? 1 : 2);
8168 d.perm[i] = ei;
8171 switch (which)
8173 default:
8174 gcc_unreachable ();
8176 case 3:
8177 d.one_vector_p = false;
8178 if (!rtx_equal_p (op0, op1))
8179 break;
8181 /* The elements of PERM do not suggest that only the first operand
8182 is used, but both operands are identical. Allow easier matching
8183 of the permutation by folding the permutation into the single
8184 input vector. */
8185 /* Fall Through. */
8186 case 2:
8187 for (i = 0; i < nelt; ++i)
8188 d.perm[i] &= nelt - 1;
8189 d.op0 = op1;
8190 d.one_vector_p = true;
8191 break;
8193 case 1:
8194 d.op1 = op0;
8195 d.one_vector_p = true;
8196 break;
8199 return aarch64_expand_vec_perm_const_1 (&d);
8202 static bool
8203 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8204 const unsigned char *sel)
8206 struct expand_vec_perm_d d;
8207 unsigned int i, nelt, which;
8208 bool ret;
8210 d.vmode = vmode;
8211 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8212 d.testing_p = true;
8213 memcpy (d.perm, sel, nelt);
8215 /* Calculate whether all elements are in one vector. */
8216 for (i = which = 0; i < nelt; ++i)
8218 unsigned char e = d.perm[i];
8219 gcc_assert (e < 2 * nelt);
8220 which |= (e < nelt ? 1 : 2);
8223 /* If all elements are from the second vector, reindex as if from the
8224 first vector. */
8225 if (which == 2)
8226 for (i = 0; i < nelt; ++i)
8227 d.perm[i] -= nelt;
8229 /* Check whether the mask can be applied to a single vector. */
8230 d.one_vector_p = (which != 3);
8232 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8233 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8234 if (!d.one_vector_p)
8235 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8237 start_sequence ();
8238 ret = aarch64_expand_vec_perm_const_1 (&d);
8239 end_sequence ();
8241 return ret;
8244 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8245 bool
8246 aarch64_cannot_change_mode_class (enum machine_mode from,
8247 enum machine_mode to,
8248 enum reg_class rclass)
8250 /* Full-reg subregs are allowed on general regs or any class if they are
8251 the same size. */
8252 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8253 || !reg_classes_intersect_p (FP_REGS, rclass))
8254 return false;
8256 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8257 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8258 2. Scalar to Scalar for integer modes or same size float modes.
8259 3. Vector to Vector modes. */
8260 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8262 if (aarch64_vector_mode_supported_p (from)
8263 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8264 return false;
8266 if (GET_MODE_NUNITS (from) == 1
8267 && GET_MODE_NUNITS (to) == 1
8268 && (GET_MODE_CLASS (from) == MODE_INT
8269 || from == to))
8270 return false;
8272 if (aarch64_vector_mode_supported_p (from)
8273 && aarch64_vector_mode_supported_p (to))
8274 return false;
8277 return true;
8280 #undef TARGET_ADDRESS_COST
8281 #define TARGET_ADDRESS_COST aarch64_address_cost
8283 /* This hook will determines whether unnamed bitfields affect the alignment
8284 of the containing structure. The hook returns true if the structure
8285 should inherit the alignment requirements of an unnamed bitfield's
8286 type. */
8287 #undef TARGET_ALIGN_ANON_BITFIELD
8288 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8290 #undef TARGET_ASM_ALIGNED_DI_OP
8291 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8293 #undef TARGET_ASM_ALIGNED_HI_OP
8294 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8296 #undef TARGET_ASM_ALIGNED_SI_OP
8297 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8299 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8300 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8301 hook_bool_const_tree_hwi_hwi_const_tree_true
8303 #undef TARGET_ASM_FILE_START
8304 #define TARGET_ASM_FILE_START aarch64_start_file
8306 #undef TARGET_ASM_OUTPUT_MI_THUNK
8307 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8309 #undef TARGET_ASM_SELECT_RTX_SECTION
8310 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8312 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8313 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8315 #undef TARGET_BUILD_BUILTIN_VA_LIST
8316 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8318 #undef TARGET_CALLEE_COPIES
8319 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8321 #undef TARGET_CAN_ELIMINATE
8322 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8324 #undef TARGET_CANNOT_FORCE_CONST_MEM
8325 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8327 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8328 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8330 /* Only the least significant bit is used for initialization guard
8331 variables. */
8332 #undef TARGET_CXX_GUARD_MASK_BIT
8333 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8335 #undef TARGET_C_MODE_FOR_SUFFIX
8336 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8338 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8339 #undef TARGET_DEFAULT_TARGET_FLAGS
8340 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8341 #endif
8343 #undef TARGET_CLASS_MAX_NREGS
8344 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8346 #undef TARGET_BUILTIN_DECL
8347 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8349 #undef TARGET_EXPAND_BUILTIN
8350 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8352 #undef TARGET_EXPAND_BUILTIN_VA_START
8353 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8355 #undef TARGET_FOLD_BUILTIN
8356 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8358 #undef TARGET_FUNCTION_ARG
8359 #define TARGET_FUNCTION_ARG aarch64_function_arg
8361 #undef TARGET_FUNCTION_ARG_ADVANCE
8362 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8364 #undef TARGET_FUNCTION_ARG_BOUNDARY
8365 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8367 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8368 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8370 #undef TARGET_FUNCTION_VALUE
8371 #define TARGET_FUNCTION_VALUE aarch64_function_value
8373 #undef TARGET_FUNCTION_VALUE_REGNO_P
8374 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8376 #undef TARGET_FRAME_POINTER_REQUIRED
8377 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8379 #undef TARGET_GIMPLE_FOLD_BUILTIN
8380 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8382 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8383 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8385 #undef TARGET_INIT_BUILTINS
8386 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8388 #undef TARGET_LEGITIMATE_ADDRESS_P
8389 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8391 #undef TARGET_LEGITIMATE_CONSTANT_P
8392 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8394 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8395 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8397 #undef TARGET_LRA_P
8398 #define TARGET_LRA_P aarch64_lra_p
8400 #undef TARGET_MANGLE_TYPE
8401 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8403 #undef TARGET_MEMORY_MOVE_COST
8404 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8406 #undef TARGET_MUST_PASS_IN_STACK
8407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8409 /* This target hook should return true if accesses to volatile bitfields
8410 should use the narrowest mode possible. It should return false if these
8411 accesses should use the bitfield container type. */
8412 #undef TARGET_NARROW_VOLATILE_BITFIELD
8413 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8415 #undef TARGET_OPTION_OVERRIDE
8416 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8418 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8419 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8420 aarch64_override_options_after_change
8422 #undef TARGET_PASS_BY_REFERENCE
8423 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8425 #undef TARGET_PREFERRED_RELOAD_CLASS
8426 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8428 #undef TARGET_SECONDARY_RELOAD
8429 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8431 #undef TARGET_SHIFT_TRUNCATION_MASK
8432 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8434 #undef TARGET_SETUP_INCOMING_VARARGS
8435 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8437 #undef TARGET_STRUCT_VALUE_RTX
8438 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8440 #undef TARGET_REGISTER_MOVE_COST
8441 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8443 #undef TARGET_RETURN_IN_MEMORY
8444 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8446 #undef TARGET_RETURN_IN_MSB
8447 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8449 #undef TARGET_RTX_COSTS
8450 #define TARGET_RTX_COSTS aarch64_rtx_costs
8452 #undef TARGET_SCHED_ISSUE_RATE
8453 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8455 #undef TARGET_TRAMPOLINE_INIT
8456 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8458 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8459 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8461 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8462 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8464 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8465 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8467 #undef TARGET_VECTORIZE_ADD_STMT_COST
8468 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8470 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8471 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8472 aarch64_builtin_vectorization_cost
8474 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8475 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8477 #undef TARGET_VECTORIZE_BUILTINS
8478 #define TARGET_VECTORIZE_BUILTINS
8480 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8481 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8482 aarch64_builtin_vectorized_function
8484 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8485 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8486 aarch64_autovectorize_vector_sizes
8488 /* Section anchor support. */
8490 #undef TARGET_MIN_ANCHOR_OFFSET
8491 #define TARGET_MIN_ANCHOR_OFFSET -256
8493 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8494 byte offset; we can do much more for larger data types, but have no way
8495 to determine the size of the access. We assume accesses are aligned. */
8496 #undef TARGET_MAX_ANCHOR_OFFSET
8497 #define TARGET_MAX_ANCHOR_OFFSET 4095
8499 #undef TARGET_VECTOR_ALIGNMENT
8500 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8502 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8503 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8504 aarch64_simd_vector_alignment_reachable
8506 /* vec_perm support. */
8508 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8509 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8510 aarch64_vectorize_vec_perm_const_ok
8513 #undef TARGET_FIXED_CONDITION_CODE_REGS
8514 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8516 struct gcc_target targetm = TARGET_INITIALIZER;
8518 #include "gt-aarch64.h"