[multiple changes]
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob3b6c67a95cb82f53d4140b1c40d8bca36f5a2ae7
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
41 #include "calls.h"
42 #include "varasm.h"
43 #include "regs.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfganal.h"
48 #include "lcm.h"
49 #include "cfgbuild.h"
50 #include "cfgcleanup.h"
51 #include "predict.h"
52 #include "basic-block.h"
53 #include "df.h"
54 #include "hard-reg-set.h"
55 #include "output.h"
56 #include "expr.h"
57 #include "reload.h"
58 #include "toplev.h"
59 #include "target.h"
60 #include "target-def.h"
61 #include "targhooks.h"
62 #include "ggc.h"
63 #include "input.h"
64 #include "function.h"
65 #include "tm_p.h"
66 #include "recog.h"
67 #include "langhooks.h"
68 #include "diagnostic-core.h"
69 #include "hash-table.h"
70 #include "tree-ssa-alias.h"
71 #include "internal-fn.h"
72 #include "gimple-fold.h"
73 #include "tree-eh.h"
74 #include "gimple-expr.h"
75 #include "is-a.h"
76 #include "gimple.h"
77 #include "gimplify.h"
78 #include "optabs.h"
79 #include "dwarf2.h"
80 #include "cfgloop.h"
81 #include "tree-vectorizer.h"
82 #include "aarch64-cost-tables.h"
83 #include "dumpfile.h"
84 #include "builtins.h"
85 #include "rtl-iter.h"
86 #include "tm-constrs.h"
88 /* Defined for convenience. */
89 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
91 /* Classifies an address.
93 ADDRESS_REG_IMM
94 A simple base register plus immediate offset.
96 ADDRESS_REG_WB
97 A base register indexed by immediate offset with writeback.
99 ADDRESS_REG_REG
100 A base register indexed by (optionally scaled) register.
102 ADDRESS_REG_UXTW
103 A base register indexed by (optionally scaled) zero-extended register.
105 ADDRESS_REG_SXTW
106 A base register indexed by (optionally scaled) sign-extended register.
108 ADDRESS_LO_SUM
109 A LO_SUM rtx with a base register and "LO12" symbol relocation.
111 ADDRESS_SYMBOLIC:
112 A constant symbolic address, in pc-relative literal pool. */
114 enum aarch64_address_type {
115 ADDRESS_REG_IMM,
116 ADDRESS_REG_WB,
117 ADDRESS_REG_REG,
118 ADDRESS_REG_UXTW,
119 ADDRESS_REG_SXTW,
120 ADDRESS_LO_SUM,
121 ADDRESS_SYMBOLIC
124 struct aarch64_address_info {
125 enum aarch64_address_type type;
126 rtx base;
127 rtx offset;
128 int shift;
129 enum aarch64_symbol_type symbol_type;
132 struct simd_immediate_info
134 rtx value;
135 int shift;
136 int element_width;
137 bool mvn;
138 bool msl;
141 /* The current code model. */
142 enum aarch64_code_model aarch64_cmodel;
144 #ifdef HAVE_AS_TLS
145 #undef TARGET_HAVE_TLS
146 #define TARGET_HAVE_TLS 1
147 #endif
149 static bool aarch64_lra_p (void);
150 static bool aarch64_composite_type_p (const_tree, machine_mode);
151 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
152 const_tree,
153 machine_mode *, int *,
154 bool *);
155 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
156 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
157 static void aarch64_override_options_after_change (void);
158 static bool aarch64_vector_mode_supported_p (machine_mode);
159 static unsigned bit_count (unsigned HOST_WIDE_INT);
160 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
161 const unsigned char *sel);
162 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
164 /* Major revision number of the ARM Architecture implemented by the target. */
165 unsigned aarch64_architecture_version;
167 /* The processor for which instructions should be scheduled. */
168 enum aarch64_processor aarch64_tune = cortexa53;
170 /* The current tuning set. */
171 const struct tune_params *aarch64_tune_params;
173 /* Mask to specify which instructions we are allowed to generate. */
174 unsigned long aarch64_isa_flags = 0;
176 /* Mask to specify which instruction scheduling options should be used. */
177 unsigned long aarch64_tune_flags = 0;
179 /* Tuning parameters. */
181 #if HAVE_DESIGNATED_INITIALIZERS
182 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
183 #else
184 #define NAMED_PARAM(NAME, VAL) (VAL)
185 #endif
187 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
188 __extension__
189 #endif
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
192 __extension__
193 #endif
194 static const struct cpu_addrcost_table generic_addrcost_table =
196 #if HAVE_DESIGNATED_INITIALIZERS
197 .addr_scale_costs =
198 #endif
200 NAMED_PARAM (hi, 0),
201 NAMED_PARAM (si, 0),
202 NAMED_PARAM (di, 0),
203 NAMED_PARAM (ti, 0),
205 NAMED_PARAM (pre_modify, 0),
206 NAMED_PARAM (post_modify, 0),
207 NAMED_PARAM (register_offset, 0),
208 NAMED_PARAM (register_extend, 0),
209 NAMED_PARAM (imm_offset, 0)
212 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
213 __extension__
214 #endif
215 static const struct cpu_addrcost_table cortexa57_addrcost_table =
217 #if HAVE_DESIGNATED_INITIALIZERS
218 .addr_scale_costs =
219 #endif
221 NAMED_PARAM (hi, 1),
222 NAMED_PARAM (si, 0),
223 NAMED_PARAM (di, 0),
224 NAMED_PARAM (ti, 1),
226 NAMED_PARAM (pre_modify, 0),
227 NAMED_PARAM (post_modify, 0),
228 NAMED_PARAM (register_offset, 0),
229 NAMED_PARAM (register_extend, 0),
230 NAMED_PARAM (imm_offset, 0),
233 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
234 __extension__
235 #endif
236 static const struct cpu_regmove_cost generic_regmove_cost =
238 NAMED_PARAM (GP2GP, 1),
239 /* Avoid the use of slow int<->fp moves for spilling by setting
240 their cost higher than memmov_cost. */
241 NAMED_PARAM (GP2FP, 5),
242 NAMED_PARAM (FP2GP, 5),
243 NAMED_PARAM (FP2FP, 2)
246 static const struct cpu_regmove_cost cortexa57_regmove_cost =
248 NAMED_PARAM (GP2GP, 1),
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
251 NAMED_PARAM (GP2FP, 5),
252 NAMED_PARAM (FP2GP, 5),
253 NAMED_PARAM (FP2FP, 2)
256 static const struct cpu_regmove_cost cortexa53_regmove_cost =
258 NAMED_PARAM (GP2GP, 1),
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
261 NAMED_PARAM (GP2FP, 5),
262 NAMED_PARAM (FP2GP, 5),
263 NAMED_PARAM (FP2FP, 2)
266 static const struct cpu_regmove_cost thunderx_regmove_cost =
268 NAMED_PARAM (GP2GP, 2),
269 NAMED_PARAM (GP2FP, 2),
270 NAMED_PARAM (FP2GP, 6),
271 NAMED_PARAM (FP2FP, 4)
274 /* Generic costs for vector insn classes. */
275 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
276 __extension__
277 #endif
278 static const struct cpu_vector_cost generic_vector_cost =
280 NAMED_PARAM (scalar_stmt_cost, 1),
281 NAMED_PARAM (scalar_load_cost, 1),
282 NAMED_PARAM (scalar_store_cost, 1),
283 NAMED_PARAM (vec_stmt_cost, 1),
284 NAMED_PARAM (vec_to_scalar_cost, 1),
285 NAMED_PARAM (scalar_to_vec_cost, 1),
286 NAMED_PARAM (vec_align_load_cost, 1),
287 NAMED_PARAM (vec_unalign_load_cost, 1),
288 NAMED_PARAM (vec_unalign_store_cost, 1),
289 NAMED_PARAM (vec_store_cost, 1),
290 NAMED_PARAM (cond_taken_branch_cost, 3),
291 NAMED_PARAM (cond_not_taken_branch_cost, 1)
294 /* Generic costs for vector insn classes. */
295 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
296 __extension__
297 #endif
298 static const struct cpu_vector_cost cortexa57_vector_cost =
300 NAMED_PARAM (scalar_stmt_cost, 1),
301 NAMED_PARAM (scalar_load_cost, 4),
302 NAMED_PARAM (scalar_store_cost, 1),
303 NAMED_PARAM (vec_stmt_cost, 3),
304 NAMED_PARAM (vec_to_scalar_cost, 8),
305 NAMED_PARAM (scalar_to_vec_cost, 8),
306 NAMED_PARAM (vec_align_load_cost, 5),
307 NAMED_PARAM (vec_unalign_load_cost, 5),
308 NAMED_PARAM (vec_unalign_store_cost, 1),
309 NAMED_PARAM (vec_store_cost, 1),
310 NAMED_PARAM (cond_taken_branch_cost, 1),
311 NAMED_PARAM (cond_not_taken_branch_cost, 1)
314 #define AARCH64_FUSE_NOTHING (0)
315 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
316 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
317 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
318 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
319 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
321 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
322 __extension__
323 #endif
324 static const struct tune_params generic_tunings =
326 &cortexa57_extra_costs,
327 &generic_addrcost_table,
328 &generic_regmove_cost,
329 &generic_vector_cost,
330 NAMED_PARAM (memmov_cost, 4),
331 NAMED_PARAM (issue_rate, 2),
332 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
333 8, /* function_align. */
334 8, /* jump_align. */
335 4, /* loop_align. */
336 2, /* int_reassoc_width. */
337 4, /* fp_reassoc_width. */
338 1 /* vec_reassoc_width. */
341 static const struct tune_params cortexa53_tunings =
343 &cortexa53_extra_costs,
344 &generic_addrcost_table,
345 &cortexa53_regmove_cost,
346 &generic_vector_cost,
347 NAMED_PARAM (memmov_cost, 4),
348 NAMED_PARAM (issue_rate, 2),
349 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
350 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
351 8, /* function_align. */
352 8, /* jump_align. */
353 4, /* loop_align. */
354 2, /* int_reassoc_width. */
355 4, /* fp_reassoc_width. */
356 1 /* vec_reassoc_width. */
359 static const struct tune_params cortexa57_tunings =
361 &cortexa57_extra_costs,
362 &cortexa57_addrcost_table,
363 &cortexa57_regmove_cost,
364 &cortexa57_vector_cost,
365 NAMED_PARAM (memmov_cost, 4),
366 NAMED_PARAM (issue_rate, 3),
367 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
368 16, /* function_align. */
369 8, /* jump_align. */
370 4, /* loop_align. */
371 2, /* int_reassoc_width. */
372 4, /* fp_reassoc_width. */
373 1 /* vec_reassoc_width. */
376 static const struct tune_params thunderx_tunings =
378 &thunderx_extra_costs,
379 &generic_addrcost_table,
380 &thunderx_regmove_cost,
381 &generic_vector_cost,
382 NAMED_PARAM (memmov_cost, 6),
383 NAMED_PARAM (issue_rate, 2),
384 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
385 8, /* function_align. */
386 8, /* jump_align. */
387 8, /* loop_align. */
388 2, /* int_reassoc_width. */
389 4, /* fp_reassoc_width. */
390 1 /* vec_reassoc_width. */
393 /* A processor implementing AArch64. */
394 struct processor
396 const char *const name;
397 enum aarch64_processor core;
398 const char *arch;
399 unsigned architecture_version;
400 const unsigned long flags;
401 const struct tune_params *const tune;
404 /* Processor cores implementing AArch64. */
405 static const struct processor all_cores[] =
407 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
408 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
409 #include "aarch64-cores.def"
410 #undef AARCH64_CORE
411 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
412 {NULL, aarch64_none, NULL, 0, 0, NULL}
415 /* Architectures implementing AArch64. */
416 static const struct processor all_architectures[] =
418 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
419 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
420 #include "aarch64-arches.def"
421 #undef AARCH64_ARCH
422 {NULL, aarch64_none, NULL, 0, 0, NULL}
425 /* Target specification. These are populated as commandline arguments
426 are processed, or NULL if not specified. */
427 static const struct processor *selected_arch;
428 static const struct processor *selected_cpu;
429 static const struct processor *selected_tune;
431 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
433 /* An ISA extension in the co-processor and main instruction set space. */
434 struct aarch64_option_extension
436 const char *const name;
437 const unsigned long flags_on;
438 const unsigned long flags_off;
441 /* ISA extensions in AArch64. */
442 static const struct aarch64_option_extension all_extensions[] =
444 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
445 {NAME, FLAGS_ON, FLAGS_OFF},
446 #include "aarch64-option-extensions.def"
447 #undef AARCH64_OPT_EXTENSION
448 {NULL, 0, 0}
451 /* Used to track the size of an address when generating a pre/post
452 increment address. */
453 static machine_mode aarch64_memory_reference_mode;
455 /* Used to force GTY into this file. */
456 static GTY(()) int gty_dummy;
458 /* A table of valid AArch64 "bitmask immediate" values for
459 logical instructions. */
461 #define AARCH64_NUM_BITMASKS 5334
462 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
464 typedef enum aarch64_cond_code
466 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
467 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
468 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
470 aarch64_cc;
472 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
474 /* The condition codes of the processor, and the inverse function. */
475 static const char * const aarch64_condition_codes[] =
477 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
478 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
481 static unsigned int
482 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
484 return 2;
487 static int
488 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
489 enum machine_mode mode)
491 if (VECTOR_MODE_P (mode))
492 return aarch64_tune_params->vec_reassoc_width;
493 if (INTEGRAL_MODE_P (mode))
494 return aarch64_tune_params->int_reassoc_width;
495 if (FLOAT_MODE_P (mode))
496 return aarch64_tune_params->fp_reassoc_width;
497 return 1;
500 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
501 unsigned
502 aarch64_dbx_register_number (unsigned regno)
504 if (GP_REGNUM_P (regno))
505 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
506 else if (regno == SP_REGNUM)
507 return AARCH64_DWARF_SP;
508 else if (FP_REGNUM_P (regno))
509 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
511 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
512 equivalent DWARF register. */
513 return DWARF_FRAME_REGISTERS;
516 /* Return TRUE if MODE is any of the large INT modes. */
517 static bool
518 aarch64_vect_struct_mode_p (machine_mode mode)
520 return mode == OImode || mode == CImode || mode == XImode;
523 /* Return TRUE if MODE is any of the vector modes. */
524 static bool
525 aarch64_vector_mode_p (machine_mode mode)
527 return aarch64_vector_mode_supported_p (mode)
528 || aarch64_vect_struct_mode_p (mode);
531 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
532 static bool
533 aarch64_array_mode_supported_p (machine_mode mode,
534 unsigned HOST_WIDE_INT nelems)
536 if (TARGET_SIMD
537 && AARCH64_VALID_SIMD_QREG_MODE (mode)
538 && (nelems >= 2 && nelems <= 4))
539 return true;
541 return false;
544 /* Implement HARD_REGNO_NREGS. */
547 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
549 switch (aarch64_regno_regclass (regno))
551 case FP_REGS:
552 case FP_LO_REGS:
553 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
554 default:
555 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
557 gcc_unreachable ();
560 /* Implement HARD_REGNO_MODE_OK. */
563 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
565 if (GET_MODE_CLASS (mode) == MODE_CC)
566 return regno == CC_REGNUM;
568 if (regno == SP_REGNUM)
569 /* The purpose of comparing with ptr_mode is to support the
570 global register variable associated with the stack pointer
571 register via the syntax of asm ("wsp") in ILP32. */
572 return mode == Pmode || mode == ptr_mode;
574 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
575 return mode == Pmode;
577 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
578 return 1;
580 if (FP_REGNUM_P (regno))
582 if (aarch64_vect_struct_mode_p (mode))
583 return
584 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
585 else
586 return 1;
589 return 0;
592 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
593 machine_mode
594 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
595 machine_mode mode)
597 /* Handle modes that fit within single registers. */
598 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
600 if (GET_MODE_SIZE (mode) >= 4)
601 return mode;
602 else
603 return SImode;
605 /* Fall back to generic for multi-reg and very large modes. */
606 else
607 return choose_hard_reg_mode (regno, nregs, false);
610 /* Return true if calls to DECL should be treated as
611 long-calls (ie called via a register). */
612 static bool
613 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
615 return false;
618 /* Return true if calls to symbol-ref SYM should be treated as
619 long-calls (ie called via a register). */
620 bool
621 aarch64_is_long_call_p (rtx sym)
623 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
626 /* Return true if the offsets to a zero/sign-extract operation
627 represent an expression that matches an extend operation. The
628 operands represent the paramters from
630 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
631 bool
632 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
633 rtx extract_imm)
635 HOST_WIDE_INT mult_val, extract_val;
637 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
638 return false;
640 mult_val = INTVAL (mult_imm);
641 extract_val = INTVAL (extract_imm);
643 if (extract_val > 8
644 && extract_val < GET_MODE_BITSIZE (mode)
645 && exact_log2 (extract_val & ~7) > 0
646 && (extract_val & 7) <= 4
647 && mult_val == (1 << (extract_val & 7)))
648 return true;
650 return false;
653 /* Emit an insn that's a simple single-set. Both the operands must be
654 known to be valid. */
655 inline static rtx
656 emit_set_insn (rtx x, rtx y)
658 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
661 /* X and Y are two things to compare using CODE. Emit the compare insn and
662 return the rtx for register 0 in the proper mode. */
664 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
666 machine_mode mode = SELECT_CC_MODE (code, x, y);
667 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
669 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
670 return cc_reg;
673 /* Build the SYMBOL_REF for __tls_get_addr. */
675 static GTY(()) rtx tls_get_addr_libfunc;
678 aarch64_tls_get_addr (void)
680 if (!tls_get_addr_libfunc)
681 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
682 return tls_get_addr_libfunc;
685 /* Return the TLS model to use for ADDR. */
687 static enum tls_model
688 tls_symbolic_operand_type (rtx addr)
690 enum tls_model tls_kind = TLS_MODEL_NONE;
691 rtx sym, addend;
693 if (GET_CODE (addr) == CONST)
695 split_const (addr, &sym, &addend);
696 if (GET_CODE (sym) == SYMBOL_REF)
697 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
699 else if (GET_CODE (addr) == SYMBOL_REF)
700 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
702 return tls_kind;
705 /* We'll allow lo_sum's in addresses in our legitimate addresses
706 so that combine would take care of combining addresses where
707 necessary, but for generation purposes, we'll generate the address
708 as :
709 RTL Absolute
710 tmp = hi (symbol_ref); adrp x1, foo
711 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
714 PIC TLS
715 adrp x1, :got:foo adrp tmp, :tlsgd:foo
716 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
717 bl __tls_get_addr
720 Load TLS symbol, depending on TLS mechanism and TLS access model.
722 Global Dynamic - Traditional TLS:
723 adrp tmp, :tlsgd:imm
724 add dest, tmp, #:tlsgd_lo12:imm
725 bl __tls_get_addr
727 Global Dynamic - TLS Descriptors:
728 adrp dest, :tlsdesc:imm
729 ldr tmp, [dest, #:tlsdesc_lo12:imm]
730 add dest, dest, #:tlsdesc_lo12:imm
731 blr tmp
732 mrs tp, tpidr_el0
733 add dest, dest, tp
735 Initial Exec:
736 mrs tp, tpidr_el0
737 adrp tmp, :gottprel:imm
738 ldr dest, [tmp, #:gottprel_lo12:imm]
739 add dest, dest, tp
741 Local Exec:
742 mrs tp, tpidr_el0
743 add t0, tp, #:tprel_hi12:imm
744 add t0, #:tprel_lo12_nc:imm
747 static void
748 aarch64_load_symref_appropriately (rtx dest, rtx imm,
749 enum aarch64_symbol_type type)
751 switch (type)
753 case SYMBOL_SMALL_ABSOLUTE:
755 /* In ILP32, the mode of dest can be either SImode or DImode. */
756 rtx tmp_reg = dest;
757 machine_mode mode = GET_MODE (dest);
759 gcc_assert (mode == Pmode || mode == ptr_mode);
761 if (can_create_pseudo_p ())
762 tmp_reg = gen_reg_rtx (mode);
764 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
765 emit_insn (gen_add_losym (dest, tmp_reg, imm));
766 return;
769 case SYMBOL_TINY_ABSOLUTE:
770 emit_insn (gen_rtx_SET (Pmode, dest, imm));
771 return;
773 case SYMBOL_SMALL_GOT:
775 /* In ILP32, the mode of dest can be either SImode or DImode,
776 while the got entry is always of SImode size. The mode of
777 dest depends on how dest is used: if dest is assigned to a
778 pointer (e.g. in the memory), it has SImode; it may have
779 DImode if dest is dereferenced to access the memeory.
780 This is why we have to handle three different ldr_got_small
781 patterns here (two patterns for ILP32). */
782 rtx tmp_reg = dest;
783 machine_mode mode = GET_MODE (dest);
785 if (can_create_pseudo_p ())
786 tmp_reg = gen_reg_rtx (mode);
788 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
789 if (mode == ptr_mode)
791 if (mode == DImode)
792 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
793 else
794 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
796 else
798 gcc_assert (mode == Pmode);
799 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
802 return;
805 case SYMBOL_SMALL_TLSGD:
807 rtx_insn *insns;
808 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
810 start_sequence ();
811 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
812 insns = get_insns ();
813 end_sequence ();
815 RTL_CONST_CALL_P (insns) = 1;
816 emit_libcall_block (insns, dest, result, imm);
817 return;
820 case SYMBOL_SMALL_TLSDESC:
822 machine_mode mode = GET_MODE (dest);
823 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
824 rtx tp;
826 gcc_assert (mode == Pmode || mode == ptr_mode);
828 /* In ILP32, the got entry is always of SImode size. Unlike
829 small GOT, the dest is fixed at reg 0. */
830 if (TARGET_ILP32)
831 emit_insn (gen_tlsdesc_small_si (imm));
832 else
833 emit_insn (gen_tlsdesc_small_di (imm));
834 tp = aarch64_load_tp (NULL);
836 if (mode != Pmode)
837 tp = gen_lowpart (mode, tp);
839 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
840 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
841 return;
844 case SYMBOL_SMALL_GOTTPREL:
846 /* In ILP32, the mode of dest can be either SImode or DImode,
847 while the got entry is always of SImode size. The mode of
848 dest depends on how dest is used: if dest is assigned to a
849 pointer (e.g. in the memory), it has SImode; it may have
850 DImode if dest is dereferenced to access the memeory.
851 This is why we have to handle three different tlsie_small
852 patterns here (two patterns for ILP32). */
853 machine_mode mode = GET_MODE (dest);
854 rtx tmp_reg = gen_reg_rtx (mode);
855 rtx tp = aarch64_load_tp (NULL);
857 if (mode == ptr_mode)
859 if (mode == DImode)
860 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
861 else
863 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
864 tp = gen_lowpart (mode, tp);
867 else
869 gcc_assert (mode == Pmode);
870 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
873 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
874 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
875 return;
878 case SYMBOL_SMALL_TPREL:
880 rtx tp = aarch64_load_tp (NULL);
881 emit_insn (gen_tlsle_small (dest, tp, imm));
882 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
883 return;
886 case SYMBOL_TINY_GOT:
887 emit_insn (gen_ldr_got_tiny (dest, imm));
888 return;
890 default:
891 gcc_unreachable ();
895 /* Emit a move from SRC to DEST. Assume that the move expanders can
896 handle all moves if !can_create_pseudo_p (). The distinction is
897 important because, unlike emit_move_insn, the move expanders know
898 how to force Pmode objects into the constant pool even when the
899 constant pool address is not itself legitimate. */
900 static rtx
901 aarch64_emit_move (rtx dest, rtx src)
903 return (can_create_pseudo_p ()
904 ? emit_move_insn (dest, src)
905 : emit_move_insn_1 (dest, src));
908 /* Split a 128-bit move operation into two 64-bit move operations,
909 taking care to handle partial overlap of register to register
910 copies. Special cases are needed when moving between GP regs and
911 FP regs. SRC can be a register, constant or memory; DST a register
912 or memory. If either operand is memory it must not have any side
913 effects. */
914 void
915 aarch64_split_128bit_move (rtx dst, rtx src)
917 rtx dst_lo, dst_hi;
918 rtx src_lo, src_hi;
920 machine_mode mode = GET_MODE (dst);
922 gcc_assert (mode == TImode || mode == TFmode);
923 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
924 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
926 if (REG_P (dst) && REG_P (src))
928 int src_regno = REGNO (src);
929 int dst_regno = REGNO (dst);
931 /* Handle FP <-> GP regs. */
932 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
934 src_lo = gen_lowpart (word_mode, src);
935 src_hi = gen_highpart (word_mode, src);
937 if (mode == TImode)
939 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
940 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
942 else
944 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
945 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
947 return;
949 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
951 dst_lo = gen_lowpart (word_mode, dst);
952 dst_hi = gen_highpart (word_mode, dst);
954 if (mode == TImode)
956 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
957 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
959 else
961 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
962 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
964 return;
968 dst_lo = gen_lowpart (word_mode, dst);
969 dst_hi = gen_highpart (word_mode, dst);
970 src_lo = gen_lowpart (word_mode, src);
971 src_hi = gen_highpart_mode (word_mode, mode, src);
973 /* At most one pairing may overlap. */
974 if (reg_overlap_mentioned_p (dst_lo, src_hi))
976 aarch64_emit_move (dst_hi, src_hi);
977 aarch64_emit_move (dst_lo, src_lo);
979 else
981 aarch64_emit_move (dst_lo, src_lo);
982 aarch64_emit_move (dst_hi, src_hi);
986 bool
987 aarch64_split_128bit_move_p (rtx dst, rtx src)
989 return (! REG_P (src)
990 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
993 /* Split a complex SIMD combine. */
995 void
996 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
998 machine_mode src_mode = GET_MODE (src1);
999 machine_mode dst_mode = GET_MODE (dst);
1001 gcc_assert (VECTOR_MODE_P (dst_mode));
1003 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1005 rtx (*gen) (rtx, rtx, rtx);
1007 switch (src_mode)
1009 case V8QImode:
1010 gen = gen_aarch64_simd_combinev8qi;
1011 break;
1012 case V4HImode:
1013 gen = gen_aarch64_simd_combinev4hi;
1014 break;
1015 case V2SImode:
1016 gen = gen_aarch64_simd_combinev2si;
1017 break;
1018 case V2SFmode:
1019 gen = gen_aarch64_simd_combinev2sf;
1020 break;
1021 case DImode:
1022 gen = gen_aarch64_simd_combinedi;
1023 break;
1024 case DFmode:
1025 gen = gen_aarch64_simd_combinedf;
1026 break;
1027 default:
1028 gcc_unreachable ();
1031 emit_insn (gen (dst, src1, src2));
1032 return;
1036 /* Split a complex SIMD move. */
1038 void
1039 aarch64_split_simd_move (rtx dst, rtx src)
1041 machine_mode src_mode = GET_MODE (src);
1042 machine_mode dst_mode = GET_MODE (dst);
1044 gcc_assert (VECTOR_MODE_P (dst_mode));
1046 if (REG_P (dst) && REG_P (src))
1048 rtx (*gen) (rtx, rtx);
1050 gcc_assert (VECTOR_MODE_P (src_mode));
1052 switch (src_mode)
1054 case V16QImode:
1055 gen = gen_aarch64_split_simd_movv16qi;
1056 break;
1057 case V8HImode:
1058 gen = gen_aarch64_split_simd_movv8hi;
1059 break;
1060 case V4SImode:
1061 gen = gen_aarch64_split_simd_movv4si;
1062 break;
1063 case V2DImode:
1064 gen = gen_aarch64_split_simd_movv2di;
1065 break;
1066 case V4SFmode:
1067 gen = gen_aarch64_split_simd_movv4sf;
1068 break;
1069 case V2DFmode:
1070 gen = gen_aarch64_split_simd_movv2df;
1071 break;
1072 default:
1073 gcc_unreachable ();
1076 emit_insn (gen (dst, src));
1077 return;
1081 static rtx
1082 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1084 if (can_create_pseudo_p ())
1085 return force_reg (mode, value);
1086 else
1088 x = aarch64_emit_move (x, value);
1089 return x;
1094 static rtx
1095 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1097 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1099 rtx high;
1100 /* Load the full offset into a register. This
1101 might be improvable in the future. */
1102 high = GEN_INT (offset);
1103 offset = 0;
1104 high = aarch64_force_temporary (mode, temp, high);
1105 reg = aarch64_force_temporary (mode, temp,
1106 gen_rtx_PLUS (mode, high, reg));
1108 return plus_constant (mode, reg, offset);
1111 static int
1112 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1113 machine_mode mode)
1115 unsigned HOST_WIDE_INT mask;
1116 int i;
1117 bool first;
1118 unsigned HOST_WIDE_INT val;
1119 bool subtargets;
1120 rtx subtarget;
1121 int one_match, zero_match, first_not_ffff_match;
1122 int num_insns = 0;
1124 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1126 if (generate)
1127 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1128 num_insns++;
1129 return num_insns;
1132 if (mode == SImode)
1134 /* We know we can't do this in 1 insn, and we must be able to do it
1135 in two; so don't mess around looking for sequences that don't buy
1136 us anything. */
1137 if (generate)
1139 emit_insn (gen_rtx_SET (VOIDmode, dest,
1140 GEN_INT (INTVAL (imm) & 0xffff)));
1141 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1142 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1144 num_insns += 2;
1145 return num_insns;
1148 /* Remaining cases are all for DImode. */
1150 val = INTVAL (imm);
1151 subtargets = optimize && can_create_pseudo_p ();
1153 one_match = 0;
1154 zero_match = 0;
1155 mask = 0xffff;
1156 first_not_ffff_match = -1;
1158 for (i = 0; i < 64; i += 16, mask <<= 16)
1160 if ((val & mask) == mask)
1161 one_match++;
1162 else
1164 if (first_not_ffff_match < 0)
1165 first_not_ffff_match = i;
1166 if ((val & mask) == 0)
1167 zero_match++;
1171 if (one_match == 2)
1173 /* Set one of the quarters and then insert back into result. */
1174 mask = 0xffffll << first_not_ffff_match;
1175 if (generate)
1177 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1178 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1179 GEN_INT ((val >> first_not_ffff_match)
1180 & 0xffff)));
1182 num_insns += 2;
1183 return num_insns;
1186 if (zero_match == 2)
1187 goto simple_sequence;
1189 mask = 0x0ffff0000UL;
1190 for (i = 16; i < 64; i += 16, mask <<= 16)
1192 HOST_WIDE_INT comp = mask & ~(mask - 1);
1194 if (aarch64_uimm12_shift (val - (val & mask)))
1196 if (generate)
1198 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1199 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1200 GEN_INT (val & mask)));
1201 emit_insn (gen_adddi3 (dest, subtarget,
1202 GEN_INT (val - (val & mask))));
1204 num_insns += 2;
1205 return num_insns;
1207 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1209 if (generate)
1211 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1212 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1213 GEN_INT ((val + comp) & mask)));
1214 emit_insn (gen_adddi3 (dest, subtarget,
1215 GEN_INT (val - ((val + comp) & mask))));
1217 num_insns += 2;
1218 return num_insns;
1220 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1222 if (generate)
1224 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT ((val - comp) | ~mask)));
1227 emit_insn (gen_adddi3 (dest, subtarget,
1228 GEN_INT (val - ((val - comp) | ~mask))));
1230 num_insns += 2;
1231 return num_insns;
1233 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1235 if (generate)
1237 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1238 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1239 GEN_INT (val | ~mask)));
1240 emit_insn (gen_adddi3 (dest, subtarget,
1241 GEN_INT (val - (val | ~mask))));
1243 num_insns += 2;
1244 return num_insns;
1248 /* See if we can do it by arithmetically combining two
1249 immediates. */
1250 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1252 int j;
1253 mask = 0xffff;
1255 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1256 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1258 if (generate)
1260 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1261 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1262 GEN_INT (aarch64_bitmasks[i])));
1263 emit_insn (gen_adddi3 (dest, subtarget,
1264 GEN_INT (val - aarch64_bitmasks[i])));
1266 num_insns += 2;
1267 return num_insns;
1270 for (j = 0; j < 64; j += 16, mask <<= 16)
1272 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1274 if (generate)
1276 emit_insn (gen_rtx_SET (VOIDmode, dest,
1277 GEN_INT (aarch64_bitmasks[i])));
1278 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1279 GEN_INT ((val >> j) & 0xffff)));
1281 num_insns += 2;
1282 return num_insns;
1287 /* See if we can do it by logically combining two immediates. */
1288 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1290 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1292 int j;
1294 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1295 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1297 if (generate)
1299 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1300 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1301 GEN_INT (aarch64_bitmasks[i])));
1302 emit_insn (gen_iordi3 (dest, subtarget,
1303 GEN_INT (aarch64_bitmasks[j])));
1305 num_insns += 2;
1306 return num_insns;
1309 else if ((val & aarch64_bitmasks[i]) == val)
1311 int j;
1313 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1314 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1316 if (generate)
1318 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1319 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1320 GEN_INT (aarch64_bitmasks[j])));
1321 emit_insn (gen_anddi3 (dest, subtarget,
1322 GEN_INT (aarch64_bitmasks[i])));
1324 num_insns += 2;
1325 return num_insns;
1330 if (one_match > zero_match)
1332 /* Set either first three quarters or all but the third. */
1333 mask = 0xffffll << (16 - first_not_ffff_match);
1334 if (generate)
1335 emit_insn (gen_rtx_SET (VOIDmode, dest,
1336 GEN_INT (val | mask | 0xffffffff00000000ull)));
1337 num_insns ++;
1339 /* Now insert other two quarters. */
1340 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1341 i < 64; i += 16, mask <<= 16)
1343 if ((val & mask) != mask)
1345 if (generate)
1346 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1347 GEN_INT ((val >> i) & 0xffff)));
1348 num_insns ++;
1351 return num_insns;
1354 simple_sequence:
1355 first = true;
1356 mask = 0xffff;
1357 for (i = 0; i < 64; i += 16, mask <<= 16)
1359 if ((val & mask) != 0)
1361 if (first)
1363 if (generate)
1364 emit_insn (gen_rtx_SET (VOIDmode, dest,
1365 GEN_INT (val & mask)));
1366 num_insns ++;
1367 first = false;
1369 else
1371 if (generate)
1372 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1373 GEN_INT ((val >> i) & 0xffff)));
1374 num_insns ++;
1379 return num_insns;
1383 void
1384 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1386 machine_mode mode = GET_MODE (dest);
1388 gcc_assert (mode == SImode || mode == DImode);
1390 /* Check on what type of symbol it is. */
1391 if (GET_CODE (imm) == SYMBOL_REF
1392 || GET_CODE (imm) == LABEL_REF
1393 || GET_CODE (imm) == CONST)
1395 rtx mem, base, offset;
1396 enum aarch64_symbol_type sty;
1398 /* If we have (const (plus symbol offset)), separate out the offset
1399 before we start classifying the symbol. */
1400 split_const (imm, &base, &offset);
1402 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1403 switch (sty)
1405 case SYMBOL_FORCE_TO_MEM:
1406 if (offset != const0_rtx
1407 && targetm.cannot_force_const_mem (mode, imm))
1409 gcc_assert (can_create_pseudo_p ());
1410 base = aarch64_force_temporary (mode, dest, base);
1411 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1412 aarch64_emit_move (dest, base);
1413 return;
1415 mem = force_const_mem (ptr_mode, imm);
1416 gcc_assert (mem);
1417 if (mode != ptr_mode)
1418 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1419 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1420 return;
1422 case SYMBOL_SMALL_TLSGD:
1423 case SYMBOL_SMALL_TLSDESC:
1424 case SYMBOL_SMALL_GOTTPREL:
1425 case SYMBOL_SMALL_GOT:
1426 case SYMBOL_TINY_GOT:
1427 if (offset != const0_rtx)
1429 gcc_assert(can_create_pseudo_p ());
1430 base = aarch64_force_temporary (mode, dest, base);
1431 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1432 aarch64_emit_move (dest, base);
1433 return;
1435 /* FALLTHRU */
1437 case SYMBOL_SMALL_TPREL:
1438 case SYMBOL_SMALL_ABSOLUTE:
1439 case SYMBOL_TINY_ABSOLUTE:
1440 aarch64_load_symref_appropriately (dest, imm, sty);
1441 return;
1443 default:
1444 gcc_unreachable ();
1448 if (!CONST_INT_P (imm))
1450 if (GET_CODE (imm) == HIGH)
1451 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1452 else
1454 rtx mem = force_const_mem (mode, imm);
1455 gcc_assert (mem);
1456 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1459 return;
1462 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1465 static bool
1466 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1467 tree exp ATTRIBUTE_UNUSED)
1469 /* Currently, always true. */
1470 return true;
1473 /* Implement TARGET_PASS_BY_REFERENCE. */
1475 static bool
1476 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1477 machine_mode mode,
1478 const_tree type,
1479 bool named ATTRIBUTE_UNUSED)
1481 HOST_WIDE_INT size;
1482 machine_mode dummymode;
1483 int nregs;
1485 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1486 size = (mode == BLKmode && type)
1487 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1489 /* Aggregates are passed by reference based on their size. */
1490 if (type && AGGREGATE_TYPE_P (type))
1492 size = int_size_in_bytes (type);
1495 /* Variable sized arguments are always returned by reference. */
1496 if (size < 0)
1497 return true;
1499 /* Can this be a candidate to be passed in fp/simd register(s)? */
1500 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1501 &dummymode, &nregs,
1502 NULL))
1503 return false;
1505 /* Arguments which are variable sized or larger than 2 registers are
1506 passed by reference unless they are a homogenous floating point
1507 aggregate. */
1508 return size > 2 * UNITS_PER_WORD;
1511 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1512 static bool
1513 aarch64_return_in_msb (const_tree valtype)
1515 machine_mode dummy_mode;
1516 int dummy_int;
1518 /* Never happens in little-endian mode. */
1519 if (!BYTES_BIG_ENDIAN)
1520 return false;
1522 /* Only composite types smaller than or equal to 16 bytes can
1523 be potentially returned in registers. */
1524 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1525 || int_size_in_bytes (valtype) <= 0
1526 || int_size_in_bytes (valtype) > 16)
1527 return false;
1529 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1530 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1531 is always passed/returned in the least significant bits of fp/simd
1532 register(s). */
1533 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1534 &dummy_mode, &dummy_int, NULL))
1535 return false;
1537 return true;
1540 /* Implement TARGET_FUNCTION_VALUE.
1541 Define how to find the value returned by a function. */
1543 static rtx
1544 aarch64_function_value (const_tree type, const_tree func,
1545 bool outgoing ATTRIBUTE_UNUSED)
1547 machine_mode mode;
1548 int unsignedp;
1549 int count;
1550 machine_mode ag_mode;
1552 mode = TYPE_MODE (type);
1553 if (INTEGRAL_TYPE_P (type))
1554 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1556 if (aarch64_return_in_msb (type))
1558 HOST_WIDE_INT size = int_size_in_bytes (type);
1560 if (size % UNITS_PER_WORD != 0)
1562 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1563 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1567 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1568 &ag_mode, &count, NULL))
1570 if (!aarch64_composite_type_p (type, mode))
1572 gcc_assert (count == 1 && mode == ag_mode);
1573 return gen_rtx_REG (mode, V0_REGNUM);
1575 else
1577 int i;
1578 rtx par;
1580 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1581 for (i = 0; i < count; i++)
1583 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1584 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1585 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1586 XVECEXP (par, 0, i) = tmp;
1588 return par;
1591 else
1592 return gen_rtx_REG (mode, R0_REGNUM);
1595 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1596 Return true if REGNO is the number of a hard register in which the values
1597 of called function may come back. */
1599 static bool
1600 aarch64_function_value_regno_p (const unsigned int regno)
1602 /* Maximum of 16 bytes can be returned in the general registers. Examples
1603 of 16-byte return values are: 128-bit integers and 16-byte small
1604 structures (excluding homogeneous floating-point aggregates). */
1605 if (regno == R0_REGNUM || regno == R1_REGNUM)
1606 return true;
1608 /* Up to four fp/simd registers can return a function value, e.g. a
1609 homogeneous floating-point aggregate having four members. */
1610 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1611 return !TARGET_GENERAL_REGS_ONLY;
1613 return false;
1616 /* Implement TARGET_RETURN_IN_MEMORY.
1618 If the type T of the result of a function is such that
1619 void func (T arg)
1620 would require that arg be passed as a value in a register (or set of
1621 registers) according to the parameter passing rules, then the result
1622 is returned in the same registers as would be used for such an
1623 argument. */
1625 static bool
1626 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1628 HOST_WIDE_INT size;
1629 machine_mode ag_mode;
1630 int count;
1632 if (!AGGREGATE_TYPE_P (type)
1633 && TREE_CODE (type) != COMPLEX_TYPE
1634 && TREE_CODE (type) != VECTOR_TYPE)
1635 /* Simple scalar types always returned in registers. */
1636 return false;
1638 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1639 type,
1640 &ag_mode,
1641 &count,
1642 NULL))
1643 return false;
1645 /* Types larger than 2 registers returned in memory. */
1646 size = int_size_in_bytes (type);
1647 return (size < 0 || size > 2 * UNITS_PER_WORD);
1650 static bool
1651 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1652 const_tree type, int *nregs)
1654 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1655 return aarch64_vfp_is_call_or_return_candidate (mode,
1656 type,
1657 &pcum->aapcs_vfp_rmode,
1658 nregs,
1659 NULL);
1662 /* Given MODE and TYPE of a function argument, return the alignment in
1663 bits. The idea is to suppress any stronger alignment requested by
1664 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1665 This is a helper function for local use only. */
1667 static unsigned int
1668 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1670 unsigned int alignment;
1672 if (type)
1674 if (!integer_zerop (TYPE_SIZE (type)))
1676 if (TYPE_MODE (type) == mode)
1677 alignment = TYPE_ALIGN (type);
1678 else
1679 alignment = GET_MODE_ALIGNMENT (mode);
1681 else
1682 alignment = 0;
1684 else
1685 alignment = GET_MODE_ALIGNMENT (mode);
1687 return alignment;
1690 /* Layout a function argument according to the AAPCS64 rules. The rule
1691 numbers refer to the rule numbers in the AAPCS64. */
1693 static void
1694 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1695 const_tree type,
1696 bool named ATTRIBUTE_UNUSED)
1698 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1699 int ncrn, nvrn, nregs;
1700 bool allocate_ncrn, allocate_nvrn;
1701 HOST_WIDE_INT size;
1703 /* We need to do this once per argument. */
1704 if (pcum->aapcs_arg_processed)
1705 return;
1707 pcum->aapcs_arg_processed = true;
1709 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1710 size
1711 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1712 UNITS_PER_WORD);
1714 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1715 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1716 mode,
1717 type,
1718 &nregs);
1720 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1721 The following code thus handles passing by SIMD/FP registers first. */
1723 nvrn = pcum->aapcs_nvrn;
1725 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1726 and homogenous short-vector aggregates (HVA). */
1727 if (allocate_nvrn)
1729 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1731 pcum->aapcs_nextnvrn = nvrn + nregs;
1732 if (!aarch64_composite_type_p (type, mode))
1734 gcc_assert (nregs == 1);
1735 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1737 else
1739 rtx par;
1740 int i;
1741 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1742 for (i = 0; i < nregs; i++)
1744 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1745 V0_REGNUM + nvrn + i);
1746 tmp = gen_rtx_EXPR_LIST
1747 (VOIDmode, tmp,
1748 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1749 XVECEXP (par, 0, i) = tmp;
1751 pcum->aapcs_reg = par;
1753 return;
1755 else
1757 /* C.3 NSRN is set to 8. */
1758 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1759 goto on_stack;
1763 ncrn = pcum->aapcs_ncrn;
1764 nregs = size / UNITS_PER_WORD;
1766 /* C6 - C9. though the sign and zero extension semantics are
1767 handled elsewhere. This is the case where the argument fits
1768 entirely general registers. */
1769 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1771 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1773 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1775 /* C.8 if the argument has an alignment of 16 then the NGRN is
1776 rounded up to the next even number. */
1777 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1779 ++ncrn;
1780 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1782 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1783 A reg is still generated for it, but the caller should be smart
1784 enough not to use it. */
1785 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1787 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1789 else
1791 rtx par;
1792 int i;
1794 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1795 for (i = 0; i < nregs; i++)
1797 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1798 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1799 GEN_INT (i * UNITS_PER_WORD));
1800 XVECEXP (par, 0, i) = tmp;
1802 pcum->aapcs_reg = par;
1805 pcum->aapcs_nextncrn = ncrn + nregs;
1806 return;
1809 /* C.11 */
1810 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1812 /* The argument is passed on stack; record the needed number of words for
1813 this argument and align the total size if necessary. */
1814 on_stack:
1815 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1816 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1817 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1818 16 / UNITS_PER_WORD);
1819 return;
1822 /* Implement TARGET_FUNCTION_ARG. */
1824 static rtx
1825 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1826 const_tree type, bool named)
1828 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1829 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1831 if (mode == VOIDmode)
1832 return NULL_RTX;
1834 aarch64_layout_arg (pcum_v, mode, type, named);
1835 return pcum->aapcs_reg;
1838 void
1839 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1840 const_tree fntype ATTRIBUTE_UNUSED,
1841 rtx libname ATTRIBUTE_UNUSED,
1842 const_tree fndecl ATTRIBUTE_UNUSED,
1843 unsigned n_named ATTRIBUTE_UNUSED)
1845 pcum->aapcs_ncrn = 0;
1846 pcum->aapcs_nvrn = 0;
1847 pcum->aapcs_nextncrn = 0;
1848 pcum->aapcs_nextnvrn = 0;
1849 pcum->pcs_variant = ARM_PCS_AAPCS64;
1850 pcum->aapcs_reg = NULL_RTX;
1851 pcum->aapcs_arg_processed = false;
1852 pcum->aapcs_stack_words = 0;
1853 pcum->aapcs_stack_size = 0;
1855 return;
1858 static void
1859 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1860 machine_mode mode,
1861 const_tree type,
1862 bool named)
1864 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1865 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1867 aarch64_layout_arg (pcum_v, mode, type, named);
1868 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1869 != (pcum->aapcs_stack_words != 0));
1870 pcum->aapcs_arg_processed = false;
1871 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1872 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1873 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1874 pcum->aapcs_stack_words = 0;
1875 pcum->aapcs_reg = NULL_RTX;
1879 bool
1880 aarch64_function_arg_regno_p (unsigned regno)
1882 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1883 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1886 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1887 PARM_BOUNDARY bits of alignment, but will be given anything up
1888 to STACK_BOUNDARY bits if the type requires it. This makes sure
1889 that both before and after the layout of each argument, the Next
1890 Stacked Argument Address (NSAA) will have a minimum alignment of
1891 8 bytes. */
1893 static unsigned int
1894 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1896 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1898 if (alignment < PARM_BOUNDARY)
1899 alignment = PARM_BOUNDARY;
1900 if (alignment > STACK_BOUNDARY)
1901 alignment = STACK_BOUNDARY;
1902 return alignment;
1905 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1907 Return true if an argument passed on the stack should be padded upwards,
1908 i.e. if the least-significant byte of the stack slot has useful data.
1910 Small aggregate types are placed in the lowest memory address.
1912 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1914 bool
1915 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1917 /* On little-endian targets, the least significant byte of every stack
1918 argument is passed at the lowest byte address of the stack slot. */
1919 if (!BYTES_BIG_ENDIAN)
1920 return true;
1922 /* Otherwise, integral, floating-point and pointer types are padded downward:
1923 the least significant byte of a stack argument is passed at the highest
1924 byte address of the stack slot. */
1925 if (type
1926 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1927 || POINTER_TYPE_P (type))
1928 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1929 return false;
1931 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1932 return true;
1935 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1937 It specifies padding for the last (may also be the only)
1938 element of a block move between registers and memory. If
1939 assuming the block is in the memory, padding upward means that
1940 the last element is padded after its highest significant byte,
1941 while in downward padding, the last element is padded at the
1942 its least significant byte side.
1944 Small aggregates and small complex types are always padded
1945 upwards.
1947 We don't need to worry about homogeneous floating-point or
1948 short-vector aggregates; their move is not affected by the
1949 padding direction determined here. Regardless of endianness,
1950 each element of such an aggregate is put in the least
1951 significant bits of a fp/simd register.
1953 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1954 register has useful data, and return the opposite if the most
1955 significant byte does. */
1957 bool
1958 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
1959 bool first ATTRIBUTE_UNUSED)
1962 /* Small composite types are always padded upward. */
1963 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1965 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1966 : GET_MODE_SIZE (mode));
1967 if (size < 2 * UNITS_PER_WORD)
1968 return true;
1971 /* Otherwise, use the default padding. */
1972 return !BYTES_BIG_ENDIAN;
1975 static machine_mode
1976 aarch64_libgcc_cmp_return_mode (void)
1978 return SImode;
1981 static bool
1982 aarch64_frame_pointer_required (void)
1984 /* In aarch64_override_options_after_change
1985 flag_omit_leaf_frame_pointer turns off the frame pointer by
1986 default. Turn it back on now if we've not got a leaf
1987 function. */
1988 if (flag_omit_leaf_frame_pointer
1989 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1990 return true;
1992 return false;
1995 /* Mark the registers that need to be saved by the callee and calculate
1996 the size of the callee-saved registers area and frame record (both FP
1997 and LR may be omitted). */
1998 static void
1999 aarch64_layout_frame (void)
2001 HOST_WIDE_INT offset = 0;
2002 int regno;
2004 if (reload_completed && cfun->machine->frame.laid_out)
2005 return;
2007 #define SLOT_NOT_REQUIRED (-2)
2008 #define SLOT_REQUIRED (-1)
2010 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2011 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2013 /* First mark all the registers that really need to be saved... */
2014 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2015 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2017 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2018 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2020 /* ... that includes the eh data registers (if needed)... */
2021 if (crtl->calls_eh_return)
2022 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2023 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2024 = SLOT_REQUIRED;
2026 /* ... and any callee saved register that dataflow says is live. */
2027 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2028 if (df_regs_ever_live_p (regno)
2029 && (regno == R30_REGNUM
2030 || !call_used_regs[regno]))
2031 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2033 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2034 if (df_regs_ever_live_p (regno)
2035 && !call_used_regs[regno])
2036 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2038 if (frame_pointer_needed)
2040 /* FP and LR are placed in the linkage record. */
2041 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2042 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2043 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2044 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2045 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2046 offset += 2 * UNITS_PER_WORD;
2049 /* Now assign stack slots for them. */
2050 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2051 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2053 cfun->machine->frame.reg_offset[regno] = offset;
2054 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2055 cfun->machine->frame.wb_candidate1 = regno;
2056 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2057 cfun->machine->frame.wb_candidate2 = regno;
2058 offset += UNITS_PER_WORD;
2061 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2062 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2064 cfun->machine->frame.reg_offset[regno] = offset;
2065 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2066 cfun->machine->frame.wb_candidate1 = regno;
2067 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2068 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2069 cfun->machine->frame.wb_candidate2 = regno;
2070 offset += UNITS_PER_WORD;
2073 cfun->machine->frame.padding0 =
2074 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2075 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2077 cfun->machine->frame.saved_regs_size = offset;
2079 cfun->machine->frame.hard_fp_offset
2080 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2081 + get_frame_size ()
2082 + cfun->machine->frame.saved_regs_size,
2083 STACK_BOUNDARY / BITS_PER_UNIT);
2085 cfun->machine->frame.frame_size
2086 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2087 + crtl->outgoing_args_size,
2088 STACK_BOUNDARY / BITS_PER_UNIT);
2090 cfun->machine->frame.laid_out = true;
2093 static bool
2094 aarch64_register_saved_on_entry (int regno)
2096 return cfun->machine->frame.reg_offset[regno] >= 0;
2099 static unsigned
2100 aarch64_next_callee_save (unsigned regno, unsigned limit)
2102 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2103 regno ++;
2104 return regno;
2107 static void
2108 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2109 HOST_WIDE_INT adjustment)
2111 rtx base_rtx = stack_pointer_rtx;
2112 rtx insn, reg, mem;
2114 reg = gen_rtx_REG (mode, regno);
2115 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2116 plus_constant (Pmode, base_rtx, -adjustment));
2117 mem = gen_rtx_MEM (mode, mem);
2119 insn = emit_move_insn (mem, reg);
2120 RTX_FRAME_RELATED_P (insn) = 1;
2123 static rtx
2124 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2125 HOST_WIDE_INT adjustment)
2127 switch (mode)
2129 case DImode:
2130 return gen_storewb_pairdi_di (base, base, reg, reg2,
2131 GEN_INT (-adjustment),
2132 GEN_INT (UNITS_PER_WORD - adjustment));
2133 case DFmode:
2134 return gen_storewb_pairdf_di (base, base, reg, reg2,
2135 GEN_INT (-adjustment),
2136 GEN_INT (UNITS_PER_WORD - adjustment));
2137 default:
2138 gcc_unreachable ();
2142 static void
2143 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2144 unsigned regno2, HOST_WIDE_INT adjustment)
2146 rtx_insn *insn;
2147 rtx reg1 = gen_rtx_REG (mode, regno1);
2148 rtx reg2 = gen_rtx_REG (mode, regno2);
2150 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2151 reg2, adjustment));
2152 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2153 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2154 RTX_FRAME_RELATED_P (insn) = 1;
2157 static rtx
2158 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2159 HOST_WIDE_INT adjustment)
2161 switch (mode)
2163 case DImode:
2164 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2165 GEN_INT (UNITS_PER_WORD));
2166 case DFmode:
2167 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2168 GEN_INT (UNITS_PER_WORD));
2169 default:
2170 gcc_unreachable ();
2174 static rtx
2175 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2176 rtx reg2)
2178 switch (mode)
2180 case DImode:
2181 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2183 case DFmode:
2184 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2186 default:
2187 gcc_unreachable ();
2191 static rtx
2192 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2193 rtx mem2)
2195 switch (mode)
2197 case DImode:
2198 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2200 case DFmode:
2201 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2203 default:
2204 gcc_unreachable ();
2209 static void
2210 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2211 unsigned start, unsigned limit, bool skip_wb)
2213 rtx_insn *insn;
2214 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2215 ? gen_frame_mem : gen_rtx_MEM);
2216 unsigned regno;
2217 unsigned regno2;
2219 for (regno = aarch64_next_callee_save (start, limit);
2220 regno <= limit;
2221 regno = aarch64_next_callee_save (regno + 1, limit))
2223 rtx reg, mem;
2224 HOST_WIDE_INT offset;
2226 if (skip_wb
2227 && (regno == cfun->machine->frame.wb_candidate1
2228 || regno == cfun->machine->frame.wb_candidate2))
2229 continue;
2231 reg = gen_rtx_REG (mode, regno);
2232 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2233 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2234 offset));
2236 regno2 = aarch64_next_callee_save (regno + 1, limit);
2238 if (regno2 <= limit
2239 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2240 == cfun->machine->frame.reg_offset[regno2]))
2243 rtx reg2 = gen_rtx_REG (mode, regno2);
2244 rtx mem2;
2246 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2247 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2248 offset));
2249 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2250 reg2));
2252 /* The first part of a frame-related parallel insn is
2253 always assumed to be relevant to the frame
2254 calculations; subsequent parts, are only
2255 frame-related if explicitly marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257 regno = regno2;
2259 else
2260 insn = emit_move_insn (mem, reg);
2262 RTX_FRAME_RELATED_P (insn) = 1;
2266 static void
2267 aarch64_restore_callee_saves (machine_mode mode,
2268 HOST_WIDE_INT start_offset, unsigned start,
2269 unsigned limit, bool skip_wb, rtx *cfi_ops)
2271 rtx base_rtx = stack_pointer_rtx;
2272 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2273 ? gen_frame_mem : gen_rtx_MEM);
2274 unsigned regno;
2275 unsigned regno2;
2276 HOST_WIDE_INT offset;
2278 for (regno = aarch64_next_callee_save (start, limit);
2279 regno <= limit;
2280 regno = aarch64_next_callee_save (regno + 1, limit))
2282 rtx reg, mem;
2284 if (skip_wb
2285 && (regno == cfun->machine->frame.wb_candidate1
2286 || regno == cfun->machine->frame.wb_candidate2))
2287 continue;
2289 reg = gen_rtx_REG (mode, regno);
2290 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2291 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2293 regno2 = aarch64_next_callee_save (regno + 1, limit);
2295 if (regno2 <= limit
2296 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2297 == cfun->machine->frame.reg_offset[regno2]))
2299 rtx reg2 = gen_rtx_REG (mode, regno2);
2300 rtx mem2;
2302 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2303 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2304 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2306 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2307 regno = regno2;
2309 else
2310 emit_move_insn (reg, mem);
2311 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2315 /* AArch64 stack frames generated by this compiler look like:
2317 +-------------------------------+
2319 | incoming stack arguments |
2321 +-------------------------------+
2322 | | <-- incoming stack pointer (aligned)
2323 | callee-allocated save area |
2324 | for register varargs |
2326 +-------------------------------+
2327 | local variables | <-- frame_pointer_rtx
2329 +-------------------------------+
2330 | padding0 | \
2331 +-------------------------------+ |
2332 | callee-saved registers | | frame.saved_regs_size
2333 +-------------------------------+ |
2334 | LR' | |
2335 +-------------------------------+ |
2336 | FP' | / <- hard_frame_pointer_rtx (aligned)
2337 +-------------------------------+
2338 | dynamic allocation |
2339 +-------------------------------+
2340 | padding |
2341 +-------------------------------+
2342 | outgoing stack arguments | <-- arg_pointer
2344 +-------------------------------+
2345 | | <-- stack_pointer_rtx (aligned)
2347 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2348 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2349 unchanged. */
2351 /* Generate the prologue instructions for entry into a function.
2352 Establish the stack frame by decreasing the stack pointer with a
2353 properly calculated size and, if necessary, create a frame record
2354 filled with the values of LR and previous frame pointer. The
2355 current FP is also set up if it is in use. */
2357 void
2358 aarch64_expand_prologue (void)
2360 /* sub sp, sp, #<frame_size>
2361 stp {fp, lr}, [sp, #<frame_size> - 16]
2362 add fp, sp, #<frame_size> - hardfp_offset
2363 stp {cs_reg}, [fp, #-16] etc.
2365 sub sp, sp, <final_adjustment_if_any>
2367 HOST_WIDE_INT frame_size, offset;
2368 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2369 HOST_WIDE_INT hard_fp_offset;
2370 rtx_insn *insn;
2372 aarch64_layout_frame ();
2374 offset = frame_size = cfun->machine->frame.frame_size;
2375 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2376 fp_offset = frame_size - hard_fp_offset;
2378 if (flag_stack_usage_info)
2379 current_function_static_stack_size = frame_size;
2381 /* Store pairs and load pairs have a range only -512 to 504. */
2382 if (offset >= 512)
2384 /* When the frame has a large size, an initial decrease is done on
2385 the stack pointer to jump over the callee-allocated save area for
2386 register varargs, the local variable area and/or the callee-saved
2387 register area. This will allow the pre-index write-back
2388 store pair instructions to be used for setting up the stack frame
2389 efficiently. */
2390 offset = hard_fp_offset;
2391 if (offset >= 512)
2392 offset = cfun->machine->frame.saved_regs_size;
2394 frame_size -= (offset + crtl->outgoing_args_size);
2395 fp_offset = 0;
2397 if (frame_size >= 0x1000000)
2399 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2400 emit_move_insn (op0, GEN_INT (-frame_size));
2401 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2403 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2404 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2405 plus_constant (Pmode, stack_pointer_rtx,
2406 -frame_size)));
2407 RTX_FRAME_RELATED_P (insn) = 1;
2409 else if (frame_size > 0)
2411 int hi_ofs = frame_size & 0xfff000;
2412 int lo_ofs = frame_size & 0x000fff;
2414 if (hi_ofs)
2416 insn = emit_insn (gen_add2_insn
2417 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2418 RTX_FRAME_RELATED_P (insn) = 1;
2420 if (lo_ofs)
2422 insn = emit_insn (gen_add2_insn
2423 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2424 RTX_FRAME_RELATED_P (insn) = 1;
2428 else
2429 frame_size = -1;
2431 if (offset > 0)
2433 bool skip_wb = false;
2435 if (frame_pointer_needed)
2437 skip_wb = true;
2439 if (fp_offset)
2441 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2442 GEN_INT (-offset)));
2443 RTX_FRAME_RELATED_P (insn) = 1;
2445 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2446 R30_REGNUM, false);
2448 else
2449 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2451 /* Set up frame pointer to point to the location of the
2452 previous frame pointer on the stack. */
2453 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2454 stack_pointer_rtx,
2455 GEN_INT (fp_offset)));
2456 RTX_FRAME_RELATED_P (insn) = 1;
2457 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2459 else
2461 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2462 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2464 if (fp_offset
2465 || reg1 == FIRST_PSEUDO_REGISTER
2466 || (reg2 == FIRST_PSEUDO_REGISTER
2467 && offset >= 256))
2469 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2470 GEN_INT (-offset)));
2471 RTX_FRAME_RELATED_P (insn) = 1;
2473 else
2475 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2477 skip_wb = true;
2479 if (reg2 == FIRST_PSEUDO_REGISTER)
2480 aarch64_pushwb_single_reg (mode1, reg1, offset);
2481 else
2482 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2486 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2487 skip_wb);
2488 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2489 skip_wb);
2492 /* when offset >= 512,
2493 sub sp, sp, #<outgoing_args_size> */
2494 if (frame_size > -1)
2496 if (crtl->outgoing_args_size > 0)
2498 insn = emit_insn (gen_add2_insn
2499 (stack_pointer_rtx,
2500 GEN_INT (- crtl->outgoing_args_size)));
2501 RTX_FRAME_RELATED_P (insn) = 1;
2506 /* Return TRUE if we can use a simple_return insn.
2508 This function checks whether the callee saved stack is empty, which
2509 means no restore actions are need. The pro_and_epilogue will use
2510 this to check whether shrink-wrapping opt is feasible. */
2512 bool
2513 aarch64_use_return_insn_p (void)
2515 if (!reload_completed)
2516 return false;
2518 if (crtl->profile)
2519 return false;
2521 aarch64_layout_frame ();
2523 return cfun->machine->frame.frame_size == 0;
2526 /* Generate the epilogue instructions for returning from a function. */
2527 void
2528 aarch64_expand_epilogue (bool for_sibcall)
2530 HOST_WIDE_INT frame_size, offset;
2531 HOST_WIDE_INT fp_offset;
2532 HOST_WIDE_INT hard_fp_offset;
2533 rtx_insn *insn;
2534 /* We need to add memory barrier to prevent read from deallocated stack. */
2535 bool need_barrier_p = (get_frame_size () != 0
2536 || cfun->machine->frame.saved_varargs_size);
2538 aarch64_layout_frame ();
2540 offset = frame_size = cfun->machine->frame.frame_size;
2541 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2542 fp_offset = frame_size - hard_fp_offset;
2544 /* Store pairs and load pairs have a range only -512 to 504. */
2545 if (offset >= 512)
2547 offset = hard_fp_offset;
2548 if (offset >= 512)
2549 offset = cfun->machine->frame.saved_regs_size;
2551 frame_size -= (offset + crtl->outgoing_args_size);
2552 fp_offset = 0;
2553 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2555 insn = emit_insn (gen_add2_insn
2556 (stack_pointer_rtx,
2557 GEN_INT (crtl->outgoing_args_size)));
2558 RTX_FRAME_RELATED_P (insn) = 1;
2561 else
2562 frame_size = -1;
2564 /* If there were outgoing arguments or we've done dynamic stack
2565 allocation, then restore the stack pointer from the frame
2566 pointer. This is at most one insn and more efficient than using
2567 GCC's internal mechanism. */
2568 if (frame_pointer_needed
2569 && (crtl->outgoing_args_size || cfun->calls_alloca))
2571 if (cfun->calls_alloca)
2572 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2574 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2575 hard_frame_pointer_rtx,
2576 GEN_INT (0)));
2577 offset = offset - fp_offset;
2580 if (offset > 0)
2582 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2583 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2584 bool skip_wb = true;
2585 rtx cfi_ops = NULL;
2587 if (frame_pointer_needed)
2588 fp_offset = 0;
2589 else if (fp_offset
2590 || reg1 == FIRST_PSEUDO_REGISTER
2591 || (reg2 == FIRST_PSEUDO_REGISTER
2592 && offset >= 256))
2593 skip_wb = false;
2595 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2596 skip_wb, &cfi_ops);
2597 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2598 skip_wb, &cfi_ops);
2600 if (need_barrier_p)
2601 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2603 if (skip_wb)
2605 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2606 rtx rreg1 = gen_rtx_REG (mode1, reg1);
2608 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2609 if (reg2 == FIRST_PSEUDO_REGISTER)
2611 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2612 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2613 mem = gen_rtx_MEM (mode1, mem);
2614 insn = emit_move_insn (rreg1, mem);
2616 else
2618 rtx rreg2 = gen_rtx_REG (mode1, reg2);
2620 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2621 insn = emit_insn (aarch64_gen_loadwb_pair
2622 (mode1, stack_pointer_rtx, rreg1,
2623 rreg2, offset));
2626 else
2628 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2629 GEN_INT (offset)));
2632 /* Reset the CFA to be SP + FRAME_SIZE. */
2633 rtx new_cfa = stack_pointer_rtx;
2634 if (frame_size > 0)
2635 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2636 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2637 REG_NOTES (insn) = cfi_ops;
2638 RTX_FRAME_RELATED_P (insn) = 1;
2641 if (frame_size > 0)
2643 if (need_barrier_p)
2644 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2646 if (frame_size >= 0x1000000)
2648 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2649 emit_move_insn (op0, GEN_INT (frame_size));
2650 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2652 else
2654 int hi_ofs = frame_size & 0xfff000;
2655 int lo_ofs = frame_size & 0x000fff;
2657 if (hi_ofs && lo_ofs)
2659 insn = emit_insn (gen_add2_insn
2660 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2661 RTX_FRAME_RELATED_P (insn) = 1;
2662 frame_size = lo_ofs;
2664 insn = emit_insn (gen_add2_insn
2665 (stack_pointer_rtx, GEN_INT (frame_size)));
2668 /* Reset the CFA to be SP + 0. */
2669 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2670 RTX_FRAME_RELATED_P (insn) = 1;
2673 /* Stack adjustment for exception handler. */
2674 if (crtl->calls_eh_return)
2676 /* We need to unwind the stack by the offset computed by
2677 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2678 to be SP; letting the CFA move during this adjustment
2679 is just as correct as retaining the CFA from the body
2680 of the function. Therefore, do nothing special. */
2681 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2684 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2685 if (!for_sibcall)
2686 emit_jump_insn (ret_rtx);
2689 /* Return the place to copy the exception unwinding return address to.
2690 This will probably be a stack slot, but could (in theory be the
2691 return register). */
2693 aarch64_final_eh_return_addr (void)
2695 HOST_WIDE_INT fp_offset;
2697 aarch64_layout_frame ();
2699 fp_offset = cfun->machine->frame.frame_size
2700 - cfun->machine->frame.hard_fp_offset;
2702 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2703 return gen_rtx_REG (DImode, LR_REGNUM);
2705 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2706 result in a store to save LR introduced by builtin_eh_return () being
2707 incorrectly deleted because the alias is not detected.
2708 So in the calculation of the address to copy the exception unwinding
2709 return address to, we note 2 cases.
2710 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2711 we return a SP-relative location since all the addresses are SP-relative
2712 in this case. This prevents the store from being optimized away.
2713 If the fp_offset is not 0, then the addresses will be FP-relative and
2714 therefore we return a FP-relative location. */
2716 if (frame_pointer_needed)
2718 if (fp_offset)
2719 return gen_frame_mem (DImode,
2720 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2721 else
2722 return gen_frame_mem (DImode,
2723 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2726 /* If FP is not needed, we calculate the location of LR, which would be
2727 at the top of the saved registers block. */
2729 return gen_frame_mem (DImode,
2730 plus_constant (Pmode,
2731 stack_pointer_rtx,
2732 fp_offset
2733 + cfun->machine->frame.saved_regs_size
2734 - 2 * UNITS_PER_WORD));
2737 /* Possibly output code to build up a constant in a register. For
2738 the benefit of the costs infrastructure, returns the number of
2739 instructions which would be emitted. GENERATE inhibits or
2740 enables code generation. */
2742 static int
2743 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2745 int insns = 0;
2747 if (aarch64_bitmask_imm (val, DImode))
2749 if (generate)
2750 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2751 insns = 1;
2753 else
2755 int i;
2756 int ncount = 0;
2757 int zcount = 0;
2758 HOST_WIDE_INT valp = val >> 16;
2759 HOST_WIDE_INT valm;
2760 HOST_WIDE_INT tval;
2762 for (i = 16; i < 64; i += 16)
2764 valm = (valp & 0xffff);
2766 if (valm != 0)
2767 ++ zcount;
2769 if (valm != 0xffff)
2770 ++ ncount;
2772 valp >>= 16;
2775 /* zcount contains the number of additional MOVK instructions
2776 required if the constant is built up with an initial MOVZ instruction,
2777 while ncount is the number of MOVK instructions required if starting
2778 with a MOVN instruction. Choose the sequence that yields the fewest
2779 number of instructions, preferring MOVZ instructions when they are both
2780 the same. */
2781 if (ncount < zcount)
2783 if (generate)
2784 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2785 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2786 tval = 0xffff;
2787 insns++;
2789 else
2791 if (generate)
2792 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2793 GEN_INT (val & 0xffff));
2794 tval = 0;
2795 insns++;
2798 val >>= 16;
2800 for (i = 16; i < 64; i += 16)
2802 if ((val & 0xffff) != tval)
2804 if (generate)
2805 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2806 GEN_INT (i),
2807 GEN_INT (val & 0xffff)));
2808 insns++;
2810 val >>= 16;
2813 return insns;
2816 static void
2817 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2819 HOST_WIDE_INT mdelta = delta;
2820 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2821 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2823 if (mdelta < 0)
2824 mdelta = -mdelta;
2826 if (mdelta >= 4096 * 4096)
2828 (void) aarch64_build_constant (scratchreg, delta, true);
2829 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2831 else if (mdelta > 0)
2833 if (mdelta >= 4096)
2835 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2836 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2837 if (delta < 0)
2838 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2839 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2840 else
2841 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2842 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2844 if (mdelta % 4096 != 0)
2846 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2847 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2848 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2853 /* Output code to add DELTA to the first argument, and then jump
2854 to FUNCTION. Used for C++ multiple inheritance. */
2855 static void
2856 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2857 HOST_WIDE_INT delta,
2858 HOST_WIDE_INT vcall_offset,
2859 tree function)
2861 /* The this pointer is always in x0. Note that this differs from
2862 Arm where the this pointer maybe bumped to r1 if r0 is required
2863 to return a pointer to an aggregate. On AArch64 a result value
2864 pointer will be in x8. */
2865 int this_regno = R0_REGNUM;
2866 rtx this_rtx, temp0, temp1, addr, funexp;
2867 rtx_insn *insn;
2869 reload_completed = 1;
2870 emit_note (NOTE_INSN_PROLOGUE_END);
2872 if (vcall_offset == 0)
2873 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2874 else
2876 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2878 this_rtx = gen_rtx_REG (Pmode, this_regno);
2879 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2880 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2882 addr = this_rtx;
2883 if (delta != 0)
2885 if (delta >= -256 && delta < 256)
2886 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2887 plus_constant (Pmode, this_rtx, delta));
2888 else
2889 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2892 if (Pmode == ptr_mode)
2893 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2894 else
2895 aarch64_emit_move (temp0,
2896 gen_rtx_ZERO_EXTEND (Pmode,
2897 gen_rtx_MEM (ptr_mode, addr)));
2899 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2900 addr = plus_constant (Pmode, temp0, vcall_offset);
2901 else
2903 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2904 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2907 if (Pmode == ptr_mode)
2908 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2909 else
2910 aarch64_emit_move (temp1,
2911 gen_rtx_SIGN_EXTEND (Pmode,
2912 gen_rtx_MEM (ptr_mode, addr)));
2914 emit_insn (gen_add2_insn (this_rtx, temp1));
2917 /* Generate a tail call to the target function. */
2918 if (!TREE_USED (function))
2920 assemble_external (function);
2921 TREE_USED (function) = 1;
2923 funexp = XEXP (DECL_RTL (function), 0);
2924 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2925 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2926 SIBLING_CALL_P (insn) = 1;
2928 insn = get_insns ();
2929 shorten_branches (insn);
2930 final_start_function (insn, file, 1);
2931 final (insn, file, 1);
2932 final_end_function ();
2934 /* Stop pretending to be a post-reload pass. */
2935 reload_completed = 0;
2938 static bool
2939 aarch64_tls_referenced_p (rtx x)
2941 if (!TARGET_HAVE_TLS)
2942 return false;
2943 subrtx_iterator::array_type array;
2944 FOR_EACH_SUBRTX (iter, array, x, ALL)
2946 const_rtx x = *iter;
2947 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2948 return true;
2949 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2950 TLS offsets, not real symbol references. */
2951 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2952 iter.skip_subrtxes ();
2954 return false;
2958 static int
2959 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2961 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2962 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2964 if (*imm1 < *imm2)
2965 return -1;
2966 if (*imm1 > *imm2)
2967 return +1;
2968 return 0;
2972 static void
2973 aarch64_build_bitmask_table (void)
2975 unsigned HOST_WIDE_INT mask, imm;
2976 unsigned int log_e, e, s, r;
2977 unsigned int nimms = 0;
2979 for (log_e = 1; log_e <= 6; log_e++)
2981 e = 1 << log_e;
2982 if (e == 64)
2983 mask = ~(HOST_WIDE_INT) 0;
2984 else
2985 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2986 for (s = 1; s < e; s++)
2988 for (r = 0; r < e; r++)
2990 /* set s consecutive bits to 1 (s < 64) */
2991 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2992 /* rotate right by r */
2993 if (r != 0)
2994 imm = ((imm >> r) | (imm << (e - r))) & mask;
2995 /* replicate the constant depending on SIMD size */
2996 switch (log_e) {
2997 case 1: imm |= (imm << 2);
2998 case 2: imm |= (imm << 4);
2999 case 3: imm |= (imm << 8);
3000 case 4: imm |= (imm << 16);
3001 case 5: imm |= (imm << 32);
3002 case 6:
3003 break;
3004 default:
3005 gcc_unreachable ();
3007 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3008 aarch64_bitmasks[nimms++] = imm;
3013 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3014 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3015 aarch64_bitmasks_cmp);
3019 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3020 a left shift of 0 or 12 bits. */
3021 bool
3022 aarch64_uimm12_shift (HOST_WIDE_INT val)
3024 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3025 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3030 /* Return true if val is an immediate that can be loaded into a
3031 register by a MOVZ instruction. */
3032 static bool
3033 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3035 if (GET_MODE_SIZE (mode) > 4)
3037 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3038 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3039 return 1;
3041 else
3043 /* Ignore sign extension. */
3044 val &= (HOST_WIDE_INT) 0xffffffff;
3046 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3047 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3051 /* Return true if val is a valid bitmask immediate. */
3052 bool
3053 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3055 if (GET_MODE_SIZE (mode) < 8)
3057 /* Replicate bit pattern. */
3058 val &= (HOST_WIDE_INT) 0xffffffff;
3059 val |= val << 32;
3061 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3062 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3066 /* Return true if val is an immediate that can be loaded into a
3067 register in a single instruction. */
3068 bool
3069 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3071 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3072 return 1;
3073 return aarch64_bitmask_imm (val, mode);
3076 static bool
3077 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3079 rtx base, offset;
3081 if (GET_CODE (x) == HIGH)
3082 return true;
3084 split_const (x, &base, &offset);
3085 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3087 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3088 != SYMBOL_FORCE_TO_MEM)
3089 return true;
3090 else
3091 /* Avoid generating a 64-bit relocation in ILP32; leave
3092 to aarch64_expand_mov_immediate to handle it properly. */
3093 return mode != ptr_mode;
3096 return aarch64_tls_referenced_p (x);
3099 /* Return true if register REGNO is a valid index register.
3100 STRICT_P is true if REG_OK_STRICT is in effect. */
3102 bool
3103 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3105 if (!HARD_REGISTER_NUM_P (regno))
3107 if (!strict_p)
3108 return true;
3110 if (!reg_renumber)
3111 return false;
3113 regno = reg_renumber[regno];
3115 return GP_REGNUM_P (regno);
3118 /* Return true if register REGNO is a valid base register for mode MODE.
3119 STRICT_P is true if REG_OK_STRICT is in effect. */
3121 bool
3122 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3124 if (!HARD_REGISTER_NUM_P (regno))
3126 if (!strict_p)
3127 return true;
3129 if (!reg_renumber)
3130 return false;
3132 regno = reg_renumber[regno];
3135 /* The fake registers will be eliminated to either the stack or
3136 hard frame pointer, both of which are usually valid base registers.
3137 Reload deals with the cases where the eliminated form isn't valid. */
3138 return (GP_REGNUM_P (regno)
3139 || regno == SP_REGNUM
3140 || regno == FRAME_POINTER_REGNUM
3141 || regno == ARG_POINTER_REGNUM);
3144 /* Return true if X is a valid base register for mode MODE.
3145 STRICT_P is true if REG_OK_STRICT is in effect. */
3147 static bool
3148 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3150 if (!strict_p && GET_CODE (x) == SUBREG)
3151 x = SUBREG_REG (x);
3153 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3156 /* Return true if address offset is a valid index. If it is, fill in INFO
3157 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3159 static bool
3160 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3161 machine_mode mode, bool strict_p)
3163 enum aarch64_address_type type;
3164 rtx index;
3165 int shift;
3167 /* (reg:P) */
3168 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3169 && GET_MODE (x) == Pmode)
3171 type = ADDRESS_REG_REG;
3172 index = x;
3173 shift = 0;
3175 /* (sign_extend:DI (reg:SI)) */
3176 else if ((GET_CODE (x) == SIGN_EXTEND
3177 || GET_CODE (x) == ZERO_EXTEND)
3178 && GET_MODE (x) == DImode
3179 && GET_MODE (XEXP (x, 0)) == SImode)
3181 type = (GET_CODE (x) == SIGN_EXTEND)
3182 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3183 index = XEXP (x, 0);
3184 shift = 0;
3186 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3187 else if (GET_CODE (x) == MULT
3188 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3189 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3190 && GET_MODE (XEXP (x, 0)) == DImode
3191 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3192 && CONST_INT_P (XEXP (x, 1)))
3194 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3195 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3196 index = XEXP (XEXP (x, 0), 0);
3197 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3199 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3200 else if (GET_CODE (x) == ASHIFT
3201 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3202 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3203 && GET_MODE (XEXP (x, 0)) == DImode
3204 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3205 && CONST_INT_P (XEXP (x, 1)))
3207 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3208 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3209 index = XEXP (XEXP (x, 0), 0);
3210 shift = INTVAL (XEXP (x, 1));
3212 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3213 else if ((GET_CODE (x) == SIGN_EXTRACT
3214 || GET_CODE (x) == ZERO_EXTRACT)
3215 && GET_MODE (x) == DImode
3216 && GET_CODE (XEXP (x, 0)) == MULT
3217 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3218 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3220 type = (GET_CODE (x) == SIGN_EXTRACT)
3221 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3222 index = XEXP (XEXP (x, 0), 0);
3223 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3224 if (INTVAL (XEXP (x, 1)) != 32 + shift
3225 || INTVAL (XEXP (x, 2)) != 0)
3226 shift = -1;
3228 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3229 (const_int 0xffffffff<<shift)) */
3230 else if (GET_CODE (x) == AND
3231 && GET_MODE (x) == DImode
3232 && GET_CODE (XEXP (x, 0)) == MULT
3233 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3234 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3235 && CONST_INT_P (XEXP (x, 1)))
3237 type = ADDRESS_REG_UXTW;
3238 index = XEXP (XEXP (x, 0), 0);
3239 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3240 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3241 shift = -1;
3243 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3244 else if ((GET_CODE (x) == SIGN_EXTRACT
3245 || GET_CODE (x) == ZERO_EXTRACT)
3246 && GET_MODE (x) == DImode
3247 && GET_CODE (XEXP (x, 0)) == ASHIFT
3248 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3249 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3251 type = (GET_CODE (x) == SIGN_EXTRACT)
3252 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3253 index = XEXP (XEXP (x, 0), 0);
3254 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3255 if (INTVAL (XEXP (x, 1)) != 32 + shift
3256 || INTVAL (XEXP (x, 2)) != 0)
3257 shift = -1;
3259 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3260 (const_int 0xffffffff<<shift)) */
3261 else if (GET_CODE (x) == AND
3262 && GET_MODE (x) == DImode
3263 && GET_CODE (XEXP (x, 0)) == ASHIFT
3264 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3265 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3266 && CONST_INT_P (XEXP (x, 1)))
3268 type = ADDRESS_REG_UXTW;
3269 index = XEXP (XEXP (x, 0), 0);
3270 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3271 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3272 shift = -1;
3274 /* (mult:P (reg:P) (const_int scale)) */
3275 else if (GET_CODE (x) == MULT
3276 && GET_MODE (x) == Pmode
3277 && GET_MODE (XEXP (x, 0)) == Pmode
3278 && CONST_INT_P (XEXP (x, 1)))
3280 type = ADDRESS_REG_REG;
3281 index = XEXP (x, 0);
3282 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3284 /* (ashift:P (reg:P) (const_int shift)) */
3285 else if (GET_CODE (x) == ASHIFT
3286 && GET_MODE (x) == Pmode
3287 && GET_MODE (XEXP (x, 0)) == Pmode
3288 && CONST_INT_P (XEXP (x, 1)))
3290 type = ADDRESS_REG_REG;
3291 index = XEXP (x, 0);
3292 shift = INTVAL (XEXP (x, 1));
3294 else
3295 return false;
3297 if (GET_CODE (index) == SUBREG)
3298 index = SUBREG_REG (index);
3300 if ((shift == 0 ||
3301 (shift > 0 && shift <= 3
3302 && (1 << shift) == GET_MODE_SIZE (mode)))
3303 && REG_P (index)
3304 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3306 info->type = type;
3307 info->offset = index;
3308 info->shift = shift;
3309 return true;
3312 return false;
3315 bool
3316 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3318 return (offset >= -64 * GET_MODE_SIZE (mode)
3319 && offset < 64 * GET_MODE_SIZE (mode)
3320 && offset % GET_MODE_SIZE (mode) == 0);
3323 static inline bool
3324 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3325 HOST_WIDE_INT offset)
3327 return offset >= -256 && offset < 256;
3330 static inline bool
3331 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3333 return (offset >= 0
3334 && offset < 4096 * GET_MODE_SIZE (mode)
3335 && offset % GET_MODE_SIZE (mode) == 0);
3338 /* Return true if X is a valid address for machine mode MODE. If it is,
3339 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3340 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3342 static bool
3343 aarch64_classify_address (struct aarch64_address_info *info,
3344 rtx x, machine_mode mode,
3345 RTX_CODE outer_code, bool strict_p)
3347 enum rtx_code code = GET_CODE (x);
3348 rtx op0, op1;
3349 bool allow_reg_index_p =
3350 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3351 || aarch64_vector_mode_supported_p (mode));
3352 /* Don't support anything other than POST_INC or REG addressing for
3353 AdvSIMD. */
3354 if (aarch64_vect_struct_mode_p (mode)
3355 && (code != POST_INC && code != REG))
3356 return false;
3358 switch (code)
3360 case REG:
3361 case SUBREG:
3362 info->type = ADDRESS_REG_IMM;
3363 info->base = x;
3364 info->offset = const0_rtx;
3365 return aarch64_base_register_rtx_p (x, strict_p);
3367 case PLUS:
3368 op0 = XEXP (x, 0);
3369 op1 = XEXP (x, 1);
3371 if (! strict_p
3372 && REG_P (op0)
3373 && (op0 == virtual_stack_vars_rtx
3374 || op0 == frame_pointer_rtx
3375 || op0 == arg_pointer_rtx)
3376 && CONST_INT_P (op1))
3378 info->type = ADDRESS_REG_IMM;
3379 info->base = op0;
3380 info->offset = op1;
3382 return true;
3385 if (GET_MODE_SIZE (mode) != 0
3386 && CONST_INT_P (op1)
3387 && aarch64_base_register_rtx_p (op0, strict_p))
3389 HOST_WIDE_INT offset = INTVAL (op1);
3391 info->type = ADDRESS_REG_IMM;
3392 info->base = op0;
3393 info->offset = op1;
3395 /* TImode and TFmode values are allowed in both pairs of X
3396 registers and individual Q registers. The available
3397 address modes are:
3398 X,X: 7-bit signed scaled offset
3399 Q: 9-bit signed offset
3400 We conservatively require an offset representable in either mode.
3402 if (mode == TImode || mode == TFmode)
3403 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3404 && offset_9bit_signed_unscaled_p (mode, offset));
3406 if (outer_code == PARALLEL)
3407 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3408 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3409 else
3410 return (offset_9bit_signed_unscaled_p (mode, offset)
3411 || offset_12bit_unsigned_scaled_p (mode, offset));
3414 if (allow_reg_index_p)
3416 /* Look for base + (scaled/extended) index register. */
3417 if (aarch64_base_register_rtx_p (op0, strict_p)
3418 && aarch64_classify_index (info, op1, mode, strict_p))
3420 info->base = op0;
3421 return true;
3423 if (aarch64_base_register_rtx_p (op1, strict_p)
3424 && aarch64_classify_index (info, op0, mode, strict_p))
3426 info->base = op1;
3427 return true;
3431 return false;
3433 case POST_INC:
3434 case POST_DEC:
3435 case PRE_INC:
3436 case PRE_DEC:
3437 info->type = ADDRESS_REG_WB;
3438 info->base = XEXP (x, 0);
3439 info->offset = NULL_RTX;
3440 return aarch64_base_register_rtx_p (info->base, strict_p);
3442 case POST_MODIFY:
3443 case PRE_MODIFY:
3444 info->type = ADDRESS_REG_WB;
3445 info->base = XEXP (x, 0);
3446 if (GET_CODE (XEXP (x, 1)) == PLUS
3447 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3448 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3449 && aarch64_base_register_rtx_p (info->base, strict_p))
3451 HOST_WIDE_INT offset;
3452 info->offset = XEXP (XEXP (x, 1), 1);
3453 offset = INTVAL (info->offset);
3455 /* TImode and TFmode values are allowed in both pairs of X
3456 registers and individual Q registers. The available
3457 address modes are:
3458 X,X: 7-bit signed scaled offset
3459 Q: 9-bit signed offset
3460 We conservatively require an offset representable in either mode.
3462 if (mode == TImode || mode == TFmode)
3463 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3464 && offset_9bit_signed_unscaled_p (mode, offset));
3466 if (outer_code == PARALLEL)
3467 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3468 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3469 else
3470 return offset_9bit_signed_unscaled_p (mode, offset);
3472 return false;
3474 case CONST:
3475 case SYMBOL_REF:
3476 case LABEL_REF:
3477 /* load literal: pc-relative constant pool entry. Only supported
3478 for SI mode or larger. */
3479 info->type = ADDRESS_SYMBOLIC;
3480 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3482 rtx sym, addend;
3484 split_const (x, &sym, &addend);
3485 return (GET_CODE (sym) == LABEL_REF
3486 || (GET_CODE (sym) == SYMBOL_REF
3487 && CONSTANT_POOL_ADDRESS_P (sym)));
3489 return false;
3491 case LO_SUM:
3492 info->type = ADDRESS_LO_SUM;
3493 info->base = XEXP (x, 0);
3494 info->offset = XEXP (x, 1);
3495 if (allow_reg_index_p
3496 && aarch64_base_register_rtx_p (info->base, strict_p))
3498 rtx sym, offs;
3499 split_const (info->offset, &sym, &offs);
3500 if (GET_CODE (sym) == SYMBOL_REF
3501 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3502 == SYMBOL_SMALL_ABSOLUTE))
3504 /* The symbol and offset must be aligned to the access size. */
3505 unsigned int align;
3506 unsigned int ref_size;
3508 if (CONSTANT_POOL_ADDRESS_P (sym))
3509 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3510 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3512 tree exp = SYMBOL_REF_DECL (sym);
3513 align = TYPE_ALIGN (TREE_TYPE (exp));
3514 align = CONSTANT_ALIGNMENT (exp, align);
3516 else if (SYMBOL_REF_DECL (sym))
3517 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3518 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3519 && SYMBOL_REF_BLOCK (sym) != NULL)
3520 align = SYMBOL_REF_BLOCK (sym)->alignment;
3521 else
3522 align = BITS_PER_UNIT;
3524 ref_size = GET_MODE_SIZE (mode);
3525 if (ref_size == 0)
3526 ref_size = GET_MODE_SIZE (DImode);
3528 return ((INTVAL (offs) & (ref_size - 1)) == 0
3529 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3532 return false;
3534 default:
3535 return false;
3539 bool
3540 aarch64_symbolic_address_p (rtx x)
3542 rtx offset;
3544 split_const (x, &x, &offset);
3545 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3548 /* Classify the base of symbolic expression X, given that X appears in
3549 context CONTEXT. */
3551 enum aarch64_symbol_type
3552 aarch64_classify_symbolic_expression (rtx x,
3553 enum aarch64_symbol_context context)
3555 rtx offset;
3557 split_const (x, &x, &offset);
3558 return aarch64_classify_symbol (x, offset, context);
3562 /* Return TRUE if X is a legitimate address for accessing memory in
3563 mode MODE. */
3564 static bool
3565 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3567 struct aarch64_address_info addr;
3569 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3572 /* Return TRUE if X is a legitimate address for accessing memory in
3573 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3574 pair operation. */
3575 bool
3576 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3577 RTX_CODE outer_code, bool strict_p)
3579 struct aarch64_address_info addr;
3581 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3584 /* Return TRUE if rtx X is immediate constant 0.0 */
3585 bool
3586 aarch64_float_const_zero_rtx_p (rtx x)
3588 REAL_VALUE_TYPE r;
3590 if (GET_MODE (x) == VOIDmode)
3591 return false;
3593 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3594 if (REAL_VALUE_MINUS_ZERO (r))
3595 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3596 return REAL_VALUES_EQUAL (r, dconst0);
3599 /* Return the fixed registers used for condition codes. */
3601 static bool
3602 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3604 *p1 = CC_REGNUM;
3605 *p2 = INVALID_REGNUM;
3606 return true;
3609 /* Emit call insn with PAT and do aarch64-specific handling. */
3611 void
3612 aarch64_emit_call_insn (rtx pat)
3614 rtx insn = emit_call_insn (pat);
3616 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3617 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3618 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3621 machine_mode
3622 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3624 /* All floating point compares return CCFP if it is an equality
3625 comparison, and CCFPE otherwise. */
3626 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3628 switch (code)
3630 case EQ:
3631 case NE:
3632 case UNORDERED:
3633 case ORDERED:
3634 case UNLT:
3635 case UNLE:
3636 case UNGT:
3637 case UNGE:
3638 case UNEQ:
3639 case LTGT:
3640 return CCFPmode;
3642 case LT:
3643 case LE:
3644 case GT:
3645 case GE:
3646 return CCFPEmode;
3648 default:
3649 gcc_unreachable ();
3653 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3654 && y == const0_rtx
3655 && (code == EQ || code == NE || code == LT || code == GE)
3656 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3657 || GET_CODE (x) == NEG))
3658 return CC_NZmode;
3660 /* A compare with a shifted operand. Because of canonicalization,
3661 the comparison will have to be swapped when we emit the assembly
3662 code. */
3663 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3664 && (REG_P (y) || GET_CODE (y) == SUBREG)
3665 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3666 || GET_CODE (x) == LSHIFTRT
3667 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3668 return CC_SWPmode;
3670 /* Similarly for a negated operand, but we can only do this for
3671 equalities. */
3672 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3673 && (REG_P (y) || GET_CODE (y) == SUBREG)
3674 && (code == EQ || code == NE)
3675 && GET_CODE (x) == NEG)
3676 return CC_Zmode;
3678 /* A compare of a mode narrower than SI mode against zero can be done
3679 by extending the value in the comparison. */
3680 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3681 && y == const0_rtx)
3682 /* Only use sign-extension if we really need it. */
3683 return ((code == GT || code == GE || code == LE || code == LT)
3684 ? CC_SESWPmode : CC_ZESWPmode);
3686 /* For everything else, return CCmode. */
3687 return CCmode;
3690 static int
3691 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3694 aarch64_get_condition_code (rtx x)
3696 machine_mode mode = GET_MODE (XEXP (x, 0));
3697 enum rtx_code comp_code = GET_CODE (x);
3699 if (GET_MODE_CLASS (mode) != MODE_CC)
3700 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3701 return aarch64_get_condition_code_1 (mode, comp_code);
3704 static int
3705 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3707 int ne = -1, eq = -1;
3708 switch (mode)
3710 case CCFPmode:
3711 case CCFPEmode:
3712 switch (comp_code)
3714 case GE: return AARCH64_GE;
3715 case GT: return AARCH64_GT;
3716 case LE: return AARCH64_LS;
3717 case LT: return AARCH64_MI;
3718 case NE: return AARCH64_NE;
3719 case EQ: return AARCH64_EQ;
3720 case ORDERED: return AARCH64_VC;
3721 case UNORDERED: return AARCH64_VS;
3722 case UNLT: return AARCH64_LT;
3723 case UNLE: return AARCH64_LE;
3724 case UNGT: return AARCH64_HI;
3725 case UNGE: return AARCH64_PL;
3726 default: return -1;
3728 break;
3730 case CC_DNEmode:
3731 ne = AARCH64_NE;
3732 eq = AARCH64_EQ;
3733 break;
3735 case CC_DEQmode:
3736 ne = AARCH64_EQ;
3737 eq = AARCH64_NE;
3738 break;
3740 case CC_DGEmode:
3741 ne = AARCH64_GE;
3742 eq = AARCH64_LT;
3743 break;
3745 case CC_DLTmode:
3746 ne = AARCH64_LT;
3747 eq = AARCH64_GE;
3748 break;
3750 case CC_DGTmode:
3751 ne = AARCH64_GT;
3752 eq = AARCH64_LE;
3753 break;
3755 case CC_DLEmode:
3756 ne = AARCH64_LE;
3757 eq = AARCH64_GT;
3758 break;
3760 case CC_DGEUmode:
3761 ne = AARCH64_CS;
3762 eq = AARCH64_CC;
3763 break;
3765 case CC_DLTUmode:
3766 ne = AARCH64_CC;
3767 eq = AARCH64_CS;
3768 break;
3770 case CC_DGTUmode:
3771 ne = AARCH64_HI;
3772 eq = AARCH64_LS;
3773 break;
3775 case CC_DLEUmode:
3776 ne = AARCH64_LS;
3777 eq = AARCH64_HI;
3778 break;
3780 case CCmode:
3781 switch (comp_code)
3783 case NE: return AARCH64_NE;
3784 case EQ: return AARCH64_EQ;
3785 case GE: return AARCH64_GE;
3786 case GT: return AARCH64_GT;
3787 case LE: return AARCH64_LE;
3788 case LT: return AARCH64_LT;
3789 case GEU: return AARCH64_CS;
3790 case GTU: return AARCH64_HI;
3791 case LEU: return AARCH64_LS;
3792 case LTU: return AARCH64_CC;
3793 default: return -1;
3795 break;
3797 case CC_SWPmode:
3798 case CC_ZESWPmode:
3799 case CC_SESWPmode:
3800 switch (comp_code)
3802 case NE: return AARCH64_NE;
3803 case EQ: return AARCH64_EQ;
3804 case GE: return AARCH64_LE;
3805 case GT: return AARCH64_LT;
3806 case LE: return AARCH64_GE;
3807 case LT: return AARCH64_GT;
3808 case GEU: return AARCH64_LS;
3809 case GTU: return AARCH64_CC;
3810 case LEU: return AARCH64_CS;
3811 case LTU: return AARCH64_HI;
3812 default: return -1;
3814 break;
3816 case CC_NZmode:
3817 switch (comp_code)
3819 case NE: return AARCH64_NE;
3820 case EQ: return AARCH64_EQ;
3821 case GE: return AARCH64_PL;
3822 case LT: return AARCH64_MI;
3823 default: return -1;
3825 break;
3827 case CC_Zmode:
3828 switch (comp_code)
3830 case NE: return AARCH64_NE;
3831 case EQ: return AARCH64_EQ;
3832 default: return -1;
3834 break;
3836 default:
3837 return -1;
3838 break;
3841 if (comp_code == NE)
3842 return ne;
3844 if (comp_code == EQ)
3845 return eq;
3847 return -1;
3850 bool
3851 aarch64_const_vec_all_same_in_range_p (rtx x,
3852 HOST_WIDE_INT minval,
3853 HOST_WIDE_INT maxval)
3855 HOST_WIDE_INT firstval;
3856 int count, i;
3858 if (GET_CODE (x) != CONST_VECTOR
3859 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3860 return false;
3862 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3863 if (firstval < minval || firstval > maxval)
3864 return false;
3866 count = CONST_VECTOR_NUNITS (x);
3867 for (i = 1; i < count; i++)
3868 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3869 return false;
3871 return true;
3874 bool
3875 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3877 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3880 static unsigned
3881 bit_count (unsigned HOST_WIDE_INT value)
3883 unsigned count = 0;
3885 while (value)
3887 count++;
3888 value &= value - 1;
3891 return count;
3894 /* N Z C V. */
3895 #define AARCH64_CC_V 1
3896 #define AARCH64_CC_C (1 << 1)
3897 #define AARCH64_CC_Z (1 << 2)
3898 #define AARCH64_CC_N (1 << 3)
3900 /* N Z C V flags for ccmp. The first code is for AND op and the other
3901 is for IOR op. Indexed by AARCH64_COND_CODE. */
3902 static const int aarch64_nzcv_codes[][2] =
3904 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3905 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3906 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3907 {0, AARCH64_CC_C}, /* CC, C == 0. */
3908 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3909 {0, AARCH64_CC_N}, /* PL, N == 0. */
3910 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3911 {0, AARCH64_CC_V}, /* VC, V == 0. */
3912 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3913 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3914 {0, AARCH64_CC_V}, /* GE, N == V. */
3915 {AARCH64_CC_V, 0}, /* LT, N != V. */
3916 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3917 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3918 {0, 0}, /* AL, Any. */
3919 {0, 0}, /* NV, Any. */
3923 aarch64_ccmp_mode_to_code (enum machine_mode mode)
3925 switch (mode)
3927 case CC_DNEmode:
3928 return NE;
3930 case CC_DEQmode:
3931 return EQ;
3933 case CC_DLEmode:
3934 return LE;
3936 case CC_DGTmode:
3937 return GT;
3939 case CC_DLTmode:
3940 return LT;
3942 case CC_DGEmode:
3943 return GE;
3945 case CC_DLEUmode:
3946 return LEU;
3948 case CC_DGTUmode:
3949 return GTU;
3951 case CC_DLTUmode:
3952 return LTU;
3954 case CC_DGEUmode:
3955 return GEU;
3957 default:
3958 gcc_unreachable ();
3963 void
3964 aarch64_print_operand (FILE *f, rtx x, char code)
3966 switch (code)
3968 /* An integer or symbol address without a preceding # sign. */
3969 case 'c':
3970 switch (GET_CODE (x))
3972 case CONST_INT:
3973 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3974 break;
3976 case SYMBOL_REF:
3977 output_addr_const (f, x);
3978 break;
3980 case CONST:
3981 if (GET_CODE (XEXP (x, 0)) == PLUS
3982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3984 output_addr_const (f, x);
3985 break;
3987 /* Fall through. */
3989 default:
3990 output_operand_lossage ("Unsupported operand for code '%c'", code);
3992 break;
3994 case 'e':
3995 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3997 int n;
3999 if (!CONST_INT_P (x)
4000 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4002 output_operand_lossage ("invalid operand for '%%%c'", code);
4003 return;
4006 switch (n)
4008 case 3:
4009 fputc ('b', f);
4010 break;
4011 case 4:
4012 fputc ('h', f);
4013 break;
4014 case 5:
4015 fputc ('w', f);
4016 break;
4017 default:
4018 output_operand_lossage ("invalid operand for '%%%c'", code);
4019 return;
4022 break;
4024 case 'p':
4026 int n;
4028 /* Print N such that 2^N == X. */
4029 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4031 output_operand_lossage ("invalid operand for '%%%c'", code);
4032 return;
4035 asm_fprintf (f, "%d", n);
4037 break;
4039 case 'P':
4040 /* Print the number of non-zero bits in X (a const_int). */
4041 if (!CONST_INT_P (x))
4043 output_operand_lossage ("invalid operand for '%%%c'", code);
4044 return;
4047 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4048 break;
4050 case 'H':
4051 /* Print the higher numbered register of a pair (TImode) of regs. */
4052 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4054 output_operand_lossage ("invalid operand for '%%%c'", code);
4055 return;
4058 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4059 break;
4061 case 'm':
4063 int cond_code;
4064 /* Print a condition (eq, ne, etc). */
4066 /* CONST_TRUE_RTX means always -- that's the default. */
4067 if (x == const_true_rtx)
4068 return;
4070 if (!COMPARISON_P (x))
4072 output_operand_lossage ("invalid operand for '%%%c'", code);
4073 return;
4076 cond_code = aarch64_get_condition_code (x);
4077 gcc_assert (cond_code >= 0);
4078 fputs (aarch64_condition_codes[cond_code], f);
4080 break;
4082 case 'M':
4084 int cond_code;
4085 /* Print the inverse of a condition (eq <-> ne, etc). */
4087 /* CONST_TRUE_RTX means never -- that's the default. */
4088 if (x == const_true_rtx)
4090 fputs ("nv", f);
4091 return;
4094 if (!COMPARISON_P (x))
4096 output_operand_lossage ("invalid operand for '%%%c'", code);
4097 return;
4099 cond_code = aarch64_get_condition_code (x);
4100 gcc_assert (cond_code >= 0);
4101 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4102 (cond_code)], f);
4104 break;
4106 case 'b':
4107 case 'h':
4108 case 's':
4109 case 'd':
4110 case 'q':
4111 /* Print a scalar FP/SIMD register name. */
4112 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4114 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4115 return;
4117 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4118 break;
4120 case 'S':
4121 case 'T':
4122 case 'U':
4123 case 'V':
4124 /* Print the first FP/SIMD register name in a list. */
4125 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4127 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4128 return;
4130 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4131 break;
4133 case 'X':
4134 /* Print bottom 16 bits of integer constant in hex. */
4135 if (!CONST_INT_P (x))
4137 output_operand_lossage ("invalid operand for '%%%c'", code);
4138 return;
4140 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4141 break;
4143 case 'w':
4144 case 'x':
4145 /* Print a general register name or the zero register (32-bit or
4146 64-bit). */
4147 if (x == const0_rtx
4148 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4150 asm_fprintf (f, "%czr", code);
4151 break;
4154 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4156 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4157 break;
4160 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4162 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4163 break;
4166 /* Fall through */
4168 case 0:
4169 /* Print a normal operand, if it's a general register, then we
4170 assume DImode. */
4171 if (x == NULL)
4173 output_operand_lossage ("missing operand");
4174 return;
4177 switch (GET_CODE (x))
4179 case REG:
4180 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4181 break;
4183 case MEM:
4184 aarch64_memory_reference_mode = GET_MODE (x);
4185 output_address (XEXP (x, 0));
4186 break;
4188 case LABEL_REF:
4189 case SYMBOL_REF:
4190 output_addr_const (asm_out_file, x);
4191 break;
4193 case CONST_INT:
4194 asm_fprintf (f, "%wd", INTVAL (x));
4195 break;
4197 case CONST_VECTOR:
4198 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4200 gcc_assert (
4201 aarch64_const_vec_all_same_in_range_p (x,
4202 HOST_WIDE_INT_MIN,
4203 HOST_WIDE_INT_MAX));
4204 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4206 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4208 fputc ('0', f);
4210 else
4211 gcc_unreachable ();
4212 break;
4214 case CONST_DOUBLE:
4215 /* CONST_DOUBLE can represent a double-width integer.
4216 In this case, the mode of x is VOIDmode. */
4217 if (GET_MODE (x) == VOIDmode)
4218 ; /* Do Nothing. */
4219 else if (aarch64_float_const_zero_rtx_p (x))
4221 fputc ('0', f);
4222 break;
4224 else if (aarch64_float_const_representable_p (x))
4226 #define buf_size 20
4227 char float_buf[buf_size] = {'\0'};
4228 REAL_VALUE_TYPE r;
4229 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4230 real_to_decimal_for_mode (float_buf, &r,
4231 buf_size, buf_size,
4232 1, GET_MODE (x));
4233 asm_fprintf (asm_out_file, "%s", float_buf);
4234 break;
4235 #undef buf_size
4237 output_operand_lossage ("invalid constant");
4238 return;
4239 default:
4240 output_operand_lossage ("invalid operand");
4241 return;
4243 break;
4245 case 'A':
4246 if (GET_CODE (x) == HIGH)
4247 x = XEXP (x, 0);
4249 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4251 case SYMBOL_SMALL_GOT:
4252 asm_fprintf (asm_out_file, ":got:");
4253 break;
4255 case SYMBOL_SMALL_TLSGD:
4256 asm_fprintf (asm_out_file, ":tlsgd:");
4257 break;
4259 case SYMBOL_SMALL_TLSDESC:
4260 asm_fprintf (asm_out_file, ":tlsdesc:");
4261 break;
4263 case SYMBOL_SMALL_GOTTPREL:
4264 asm_fprintf (asm_out_file, ":gottprel:");
4265 break;
4267 case SYMBOL_SMALL_TPREL:
4268 asm_fprintf (asm_out_file, ":tprel:");
4269 break;
4271 case SYMBOL_TINY_GOT:
4272 gcc_unreachable ();
4273 break;
4275 default:
4276 break;
4278 output_addr_const (asm_out_file, x);
4279 break;
4281 case 'L':
4282 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4284 case SYMBOL_SMALL_GOT:
4285 asm_fprintf (asm_out_file, ":lo12:");
4286 break;
4288 case SYMBOL_SMALL_TLSGD:
4289 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4290 break;
4292 case SYMBOL_SMALL_TLSDESC:
4293 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4294 break;
4296 case SYMBOL_SMALL_GOTTPREL:
4297 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4298 break;
4300 case SYMBOL_SMALL_TPREL:
4301 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4302 break;
4304 case SYMBOL_TINY_GOT:
4305 asm_fprintf (asm_out_file, ":got:");
4306 break;
4308 default:
4309 break;
4311 output_addr_const (asm_out_file, x);
4312 break;
4314 case 'G':
4316 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4318 case SYMBOL_SMALL_TPREL:
4319 asm_fprintf (asm_out_file, ":tprel_hi12:");
4320 break;
4321 default:
4322 break;
4324 output_addr_const (asm_out_file, x);
4325 break;
4327 case 'K':
4329 int cond_code;
4330 /* Print nzcv. */
4332 if (!COMPARISON_P (x))
4334 output_operand_lossage ("invalid operand for '%%%c'", code);
4335 return;
4338 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4339 gcc_assert (cond_code >= 0);
4340 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4342 break;
4344 case 'k':
4346 int cond_code;
4347 /* Print nzcv. */
4349 if (!COMPARISON_P (x))
4351 output_operand_lossage ("invalid operand for '%%%c'", code);
4352 return;
4355 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4356 gcc_assert (cond_code >= 0);
4357 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4359 break;
4361 default:
4362 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4363 return;
4367 void
4368 aarch64_print_operand_address (FILE *f, rtx x)
4370 struct aarch64_address_info addr;
4372 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4373 MEM, true))
4374 switch (addr.type)
4376 case ADDRESS_REG_IMM:
4377 if (addr.offset == const0_rtx)
4378 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4379 else
4380 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4381 INTVAL (addr.offset));
4382 return;
4384 case ADDRESS_REG_REG:
4385 if (addr.shift == 0)
4386 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4387 reg_names [REGNO (addr.offset)]);
4388 else
4389 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4390 reg_names [REGNO (addr.offset)], addr.shift);
4391 return;
4393 case ADDRESS_REG_UXTW:
4394 if (addr.shift == 0)
4395 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4396 REGNO (addr.offset) - R0_REGNUM);
4397 else
4398 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4399 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4400 return;
4402 case ADDRESS_REG_SXTW:
4403 if (addr.shift == 0)
4404 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4405 REGNO (addr.offset) - R0_REGNUM);
4406 else
4407 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4408 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4409 return;
4411 case ADDRESS_REG_WB:
4412 switch (GET_CODE (x))
4414 case PRE_INC:
4415 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4416 GET_MODE_SIZE (aarch64_memory_reference_mode));
4417 return;
4418 case POST_INC:
4419 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4420 GET_MODE_SIZE (aarch64_memory_reference_mode));
4421 return;
4422 case PRE_DEC:
4423 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4424 GET_MODE_SIZE (aarch64_memory_reference_mode));
4425 return;
4426 case POST_DEC:
4427 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4428 GET_MODE_SIZE (aarch64_memory_reference_mode));
4429 return;
4430 case PRE_MODIFY:
4431 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4432 INTVAL (addr.offset));
4433 return;
4434 case POST_MODIFY:
4435 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4436 INTVAL (addr.offset));
4437 return;
4438 default:
4439 break;
4441 break;
4443 case ADDRESS_LO_SUM:
4444 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4445 output_addr_const (f, addr.offset);
4446 asm_fprintf (f, "]");
4447 return;
4449 case ADDRESS_SYMBOLIC:
4450 break;
4453 output_addr_const (f, x);
4456 bool
4457 aarch64_label_mentioned_p (rtx x)
4459 const char *fmt;
4460 int i;
4462 if (GET_CODE (x) == LABEL_REF)
4463 return true;
4465 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4466 referencing instruction, but they are constant offsets, not
4467 symbols. */
4468 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4469 return false;
4471 fmt = GET_RTX_FORMAT (GET_CODE (x));
4472 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4474 if (fmt[i] == 'E')
4476 int j;
4478 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4479 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4480 return 1;
4482 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4483 return 1;
4486 return 0;
4489 /* Implement REGNO_REG_CLASS. */
4491 enum reg_class
4492 aarch64_regno_regclass (unsigned regno)
4494 if (GP_REGNUM_P (regno))
4495 return GENERAL_REGS;
4497 if (regno == SP_REGNUM)
4498 return STACK_REG;
4500 if (regno == FRAME_POINTER_REGNUM
4501 || regno == ARG_POINTER_REGNUM)
4502 return POINTER_REGS;
4504 if (FP_REGNUM_P (regno))
4505 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4507 return NO_REGS;
4510 static rtx
4511 aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
4513 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4514 where mask is selected by alignment and size of the offset.
4515 We try to pick as large a range for the offset as possible to
4516 maximize the chance of a CSE. However, for aligned addresses
4517 we limit the range to 4k so that structures with different sized
4518 elements are likely to use the same base. */
4520 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4522 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4523 HOST_WIDE_INT base_offset;
4525 /* Does it look like we'll need a load/store-pair operation? */
4526 if (GET_MODE_SIZE (mode) > 16
4527 || mode == TImode)
4528 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4529 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4530 /* For offsets aren't a multiple of the access size, the limit is
4531 -256...255. */
4532 else if (offset & (GET_MODE_SIZE (mode) - 1))
4533 base_offset = (offset + 0x100) & ~0x1ff;
4534 else
4535 base_offset = offset & ~0xfff;
4537 if (base_offset == 0)
4538 return x;
4540 offset -= base_offset;
4541 rtx base_reg = gen_reg_rtx (Pmode);
4542 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4543 NULL_RTX);
4544 emit_move_insn (base_reg, val);
4545 x = plus_constant (Pmode, base_reg, offset);
4548 return x;
4551 /* Try a machine-dependent way of reloading an illegitimate address
4552 operand. If we find one, push the reload and return the new rtx. */
4555 aarch64_legitimize_reload_address (rtx *x_p,
4556 machine_mode mode,
4557 int opnum, int type,
4558 int ind_levels ATTRIBUTE_UNUSED)
4560 rtx x = *x_p;
4562 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4563 if (aarch64_vect_struct_mode_p (mode)
4564 && GET_CODE (x) == PLUS
4565 && REG_P (XEXP (x, 0))
4566 && CONST_INT_P (XEXP (x, 1)))
4568 rtx orig_rtx = x;
4569 x = copy_rtx (x);
4570 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4571 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4572 opnum, (enum reload_type) type);
4573 return x;
4576 /* We must recognize output that we have already generated ourselves. */
4577 if (GET_CODE (x) == PLUS
4578 && GET_CODE (XEXP (x, 0)) == PLUS
4579 && REG_P (XEXP (XEXP (x, 0), 0))
4580 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4581 && CONST_INT_P (XEXP (x, 1)))
4583 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4584 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4585 opnum, (enum reload_type) type);
4586 return x;
4589 /* We wish to handle large displacements off a base register by splitting
4590 the addend across an add and the mem insn. This can cut the number of
4591 extra insns needed from 3 to 1. It is only useful for load/store of a
4592 single register with 12 bit offset field. */
4593 if (GET_CODE (x) == PLUS
4594 && REG_P (XEXP (x, 0))
4595 && CONST_INT_P (XEXP (x, 1))
4596 && HARD_REGISTER_P (XEXP (x, 0))
4597 && mode != TImode
4598 && mode != TFmode
4599 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4601 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4602 HOST_WIDE_INT low = val & 0xfff;
4603 HOST_WIDE_INT high = val - low;
4604 HOST_WIDE_INT offs;
4605 rtx cst;
4606 machine_mode xmode = GET_MODE (x);
4608 /* In ILP32, xmode can be either DImode or SImode. */
4609 gcc_assert (xmode == DImode || xmode == SImode);
4611 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4612 BLKmode alignment. */
4613 if (GET_MODE_SIZE (mode) == 0)
4614 return NULL_RTX;
4616 offs = low % GET_MODE_SIZE (mode);
4618 /* Align misaligned offset by adjusting high part to compensate. */
4619 if (offs != 0)
4621 if (aarch64_uimm12_shift (high + offs))
4623 /* Align down. */
4624 low = low - offs;
4625 high = high + offs;
4627 else
4629 /* Align up. */
4630 offs = GET_MODE_SIZE (mode) - offs;
4631 low = low + offs;
4632 high = high + (low & 0x1000) - offs;
4633 low &= 0xfff;
4637 /* Check for overflow. */
4638 if (high + low != val)
4639 return NULL_RTX;
4641 cst = GEN_INT (high);
4642 if (!aarch64_uimm12_shift (high))
4643 cst = force_const_mem (xmode, cst);
4645 /* Reload high part into base reg, leaving the low part
4646 in the mem instruction.
4647 Note that replacing this gen_rtx_PLUS with plus_constant is
4648 wrong in this case because we rely on the
4649 (plus (plus reg c1) c2) structure being preserved so that
4650 XEXP (*p, 0) in push_reload below uses the correct term. */
4651 x = gen_rtx_PLUS (xmode,
4652 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4653 GEN_INT (low));
4655 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4656 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4657 opnum, (enum reload_type) type);
4658 return x;
4661 return NULL_RTX;
4665 static reg_class_t
4666 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4667 reg_class_t rclass,
4668 machine_mode mode,
4669 secondary_reload_info *sri)
4671 /* Without the TARGET_SIMD instructions we cannot move a Q register
4672 to a Q register directly. We need a scratch. */
4673 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4674 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4675 && reg_class_subset_p (rclass, FP_REGS))
4677 if (mode == TFmode)
4678 sri->icode = CODE_FOR_aarch64_reload_movtf;
4679 else if (mode == TImode)
4680 sri->icode = CODE_FOR_aarch64_reload_movti;
4681 return NO_REGS;
4684 /* A TFmode or TImode memory access should be handled via an FP_REGS
4685 because AArch64 has richer addressing modes for LDR/STR instructions
4686 than LDP/STP instructions. */
4687 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4688 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4689 return FP_REGS;
4691 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4692 return GENERAL_REGS;
4694 return NO_REGS;
4697 static bool
4698 aarch64_can_eliminate (const int from, const int to)
4700 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4701 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4703 if (frame_pointer_needed)
4705 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4706 return true;
4707 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4708 return false;
4709 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4710 && !cfun->calls_alloca)
4711 return true;
4712 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4713 return true;
4715 return false;
4717 else
4719 /* If we decided that we didn't need a leaf frame pointer but then used
4720 LR in the function, then we'll want a frame pointer after all, so
4721 prevent this elimination to ensure a frame pointer is used. */
4722 if (to == STACK_POINTER_REGNUM
4723 && flag_omit_leaf_frame_pointer
4724 && df_regs_ever_live_p (LR_REGNUM))
4725 return false;
4728 return true;
4731 HOST_WIDE_INT
4732 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4734 aarch64_layout_frame ();
4736 if (to == HARD_FRAME_POINTER_REGNUM)
4738 if (from == ARG_POINTER_REGNUM)
4739 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4741 if (from == FRAME_POINTER_REGNUM)
4742 return (cfun->machine->frame.hard_fp_offset
4743 - cfun->machine->frame.saved_varargs_size);
4746 if (to == STACK_POINTER_REGNUM)
4748 if (from == FRAME_POINTER_REGNUM)
4749 return (cfun->machine->frame.frame_size
4750 - cfun->machine->frame.saved_varargs_size);
4753 return cfun->machine->frame.frame_size;
4756 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4757 previous frame. */
4760 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4762 if (count != 0)
4763 return const0_rtx;
4764 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4768 static void
4769 aarch64_asm_trampoline_template (FILE *f)
4771 if (TARGET_ILP32)
4773 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4774 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4776 else
4778 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4779 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4781 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4782 assemble_aligned_integer (4, const0_rtx);
4783 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4784 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4787 static void
4788 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4790 rtx fnaddr, mem, a_tramp;
4791 const int tramp_code_sz = 16;
4793 /* Don't need to copy the trailing D-words, we fill those in below. */
4794 emit_block_move (m_tramp, assemble_trampoline_template (),
4795 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4796 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4797 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4798 if (GET_MODE (fnaddr) != ptr_mode)
4799 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4800 emit_move_insn (mem, fnaddr);
4802 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4803 emit_move_insn (mem, chain_value);
4805 /* XXX We should really define a "clear_cache" pattern and use
4806 gen_clear_cache(). */
4807 a_tramp = XEXP (m_tramp, 0);
4808 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4809 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4810 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4811 ptr_mode);
4814 static unsigned char
4815 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4817 switch (regclass)
4819 case CALLER_SAVE_REGS:
4820 case POINTER_REGS:
4821 case GENERAL_REGS:
4822 case ALL_REGS:
4823 case FP_REGS:
4824 case FP_LO_REGS:
4825 return
4826 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4827 (GET_MODE_SIZE (mode) + 7) / 8;
4828 case STACK_REG:
4829 return 1;
4831 case NO_REGS:
4832 return 0;
4834 default:
4835 break;
4837 gcc_unreachable ();
4840 static reg_class_t
4841 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4843 if (regclass == POINTER_REGS)
4844 return GENERAL_REGS;
4846 if (regclass == STACK_REG)
4848 if (REG_P(x)
4849 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4850 return regclass;
4852 return NO_REGS;
4855 /* If it's an integer immediate that MOVI can't handle, then
4856 FP_REGS is not an option, so we return NO_REGS instead. */
4857 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4858 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4859 return NO_REGS;
4861 /* Register eliminiation can result in a request for
4862 SP+constant->FP_REGS. We cannot support such operations which
4863 use SP as source and an FP_REG as destination, so reject out
4864 right now. */
4865 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4867 rtx lhs = XEXP (x, 0);
4869 /* Look through a possible SUBREG introduced by ILP32. */
4870 if (GET_CODE (lhs) == SUBREG)
4871 lhs = SUBREG_REG (lhs);
4873 gcc_assert (REG_P (lhs));
4874 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4875 POINTER_REGS));
4876 return NO_REGS;
4879 return regclass;
4882 void
4883 aarch64_asm_output_labelref (FILE* f, const char *name)
4885 asm_fprintf (f, "%U%s", name);
4888 static void
4889 aarch64_elf_asm_constructor (rtx symbol, int priority)
4891 if (priority == DEFAULT_INIT_PRIORITY)
4892 default_ctor_section_asm_out_constructor (symbol, priority);
4893 else
4895 section *s;
4896 char buf[18];
4897 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4898 s = get_section (buf, SECTION_WRITE, NULL);
4899 switch_to_section (s);
4900 assemble_align (POINTER_SIZE);
4901 assemble_aligned_integer (POINTER_BYTES, symbol);
4905 static void
4906 aarch64_elf_asm_destructor (rtx symbol, int priority)
4908 if (priority == DEFAULT_INIT_PRIORITY)
4909 default_dtor_section_asm_out_destructor (symbol, priority);
4910 else
4912 section *s;
4913 char buf[18];
4914 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4915 s = get_section (buf, SECTION_WRITE, NULL);
4916 switch_to_section (s);
4917 assemble_align (POINTER_SIZE);
4918 assemble_aligned_integer (POINTER_BYTES, symbol);
4922 const char*
4923 aarch64_output_casesi (rtx *operands)
4925 char buf[100];
4926 char label[100];
4927 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
4928 int index;
4929 static const char *const patterns[4][2] =
4932 "ldrb\t%w3, [%0,%w1,uxtw]",
4933 "add\t%3, %4, %w3, sxtb #2"
4936 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4937 "add\t%3, %4, %w3, sxth #2"
4940 "ldr\t%w3, [%0,%w1,uxtw #2]",
4941 "add\t%3, %4, %w3, sxtw #2"
4943 /* We assume that DImode is only generated when not optimizing and
4944 that we don't really need 64-bit address offsets. That would
4945 imply an object file with 8GB of code in a single function! */
4947 "ldr\t%w3, [%0,%w1,uxtw #2]",
4948 "add\t%3, %4, %w3, sxtw #2"
4952 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4954 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4956 gcc_assert (index >= 0 && index <= 3);
4958 /* Need to implement table size reduction, by chaning the code below. */
4959 output_asm_insn (patterns[index][0], operands);
4960 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4961 snprintf (buf, sizeof (buf),
4962 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4963 output_asm_insn (buf, operands);
4964 output_asm_insn (patterns[index][1], operands);
4965 output_asm_insn ("br\t%3", operands);
4966 assemble_label (asm_out_file, label);
4967 return "";
4971 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4972 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4973 operator. */
4976 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4978 if (shift >= 0 && shift <= 3)
4980 int size;
4981 for (size = 8; size <= 32; size *= 2)
4983 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4984 if (mask == bits << shift)
4985 return size;
4988 return 0;
4991 static bool
4992 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
4993 const_rtx x ATTRIBUTE_UNUSED)
4995 /* We can't use blocks for constants when we're using a per-function
4996 constant pool. */
4997 return false;
5000 static section *
5001 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5002 rtx x ATTRIBUTE_UNUSED,
5003 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5005 /* Force all constant pool entries into the current function section. */
5006 return function_section (current_function_decl);
5010 /* Costs. */
5012 /* Helper function for rtx cost calculation. Strip a shift expression
5013 from X. Returns the inner operand if successful, or the original
5014 expression on failure. */
5015 static rtx
5016 aarch64_strip_shift (rtx x)
5018 rtx op = x;
5020 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5021 we can convert both to ROR during final output. */
5022 if ((GET_CODE (op) == ASHIFT
5023 || GET_CODE (op) == ASHIFTRT
5024 || GET_CODE (op) == LSHIFTRT
5025 || GET_CODE (op) == ROTATERT
5026 || GET_CODE (op) == ROTATE)
5027 && CONST_INT_P (XEXP (op, 1)))
5028 return XEXP (op, 0);
5030 if (GET_CODE (op) == MULT
5031 && CONST_INT_P (XEXP (op, 1))
5032 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5033 return XEXP (op, 0);
5035 return x;
5038 /* Helper function for rtx cost calculation. Strip an extend
5039 expression from X. Returns the inner operand if successful, or the
5040 original expression on failure. We deal with a number of possible
5041 canonicalization variations here. */
5042 static rtx
5043 aarch64_strip_extend (rtx x)
5045 rtx op = x;
5047 /* Zero and sign extraction of a widened value. */
5048 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5049 && XEXP (op, 2) == const0_rtx
5050 && GET_CODE (XEXP (op, 0)) == MULT
5051 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5052 XEXP (op, 1)))
5053 return XEXP (XEXP (op, 0), 0);
5055 /* It can also be represented (for zero-extend) as an AND with an
5056 immediate. */
5057 if (GET_CODE (op) == AND
5058 && GET_CODE (XEXP (op, 0)) == MULT
5059 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5060 && CONST_INT_P (XEXP (op, 1))
5061 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5062 INTVAL (XEXP (op, 1))) != 0)
5063 return XEXP (XEXP (op, 0), 0);
5065 /* Now handle extended register, as this may also have an optional
5066 left shift by 1..4. */
5067 if (GET_CODE (op) == ASHIFT
5068 && CONST_INT_P (XEXP (op, 1))
5069 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5070 op = XEXP (op, 0);
5072 if (GET_CODE (op) == ZERO_EXTEND
5073 || GET_CODE (op) == SIGN_EXTEND)
5074 op = XEXP (op, 0);
5076 if (op != x)
5077 return op;
5079 return x;
5082 /* Helper function for rtx cost calculation. Calculate the cost of
5083 a MULT, which may be part of a multiply-accumulate rtx. Return
5084 the calculated cost of the expression, recursing manually in to
5085 operands where needed. */
5087 static int
5088 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5090 rtx op0, op1;
5091 const struct cpu_cost_table *extra_cost
5092 = aarch64_tune_params->insn_extra_cost;
5093 int cost = 0;
5094 bool maybe_fma = (outer == PLUS || outer == MINUS);
5095 machine_mode mode = GET_MODE (x);
5097 gcc_checking_assert (code == MULT);
5099 op0 = XEXP (x, 0);
5100 op1 = XEXP (x, 1);
5102 if (VECTOR_MODE_P (mode))
5103 mode = GET_MODE_INNER (mode);
5105 /* Integer multiply/fma. */
5106 if (GET_MODE_CLASS (mode) == MODE_INT)
5108 /* The multiply will be canonicalized as a shift, cost it as such. */
5109 if (CONST_INT_P (op1)
5110 && exact_log2 (INTVAL (op1)) > 0)
5112 if (speed)
5114 if (maybe_fma)
5115 /* ADD (shifted register). */
5116 cost += extra_cost->alu.arith_shift;
5117 else
5118 /* LSL (immediate). */
5119 cost += extra_cost->alu.shift;
5122 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5124 return cost;
5127 /* Integer multiplies or FMAs have zero/sign extending variants. */
5128 if ((GET_CODE (op0) == ZERO_EXTEND
5129 && GET_CODE (op1) == ZERO_EXTEND)
5130 || (GET_CODE (op0) == SIGN_EXTEND
5131 && GET_CODE (op1) == SIGN_EXTEND))
5133 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5134 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5136 if (speed)
5138 if (maybe_fma)
5139 /* MADD/SMADDL/UMADDL. */
5140 cost += extra_cost->mult[0].extend_add;
5141 else
5142 /* MUL/SMULL/UMULL. */
5143 cost += extra_cost->mult[0].extend;
5146 return cost;
5149 /* This is either an integer multiply or an FMA. In both cases
5150 we want to recurse and cost the operands. */
5151 cost += rtx_cost (op0, MULT, 0, speed)
5152 + rtx_cost (op1, MULT, 1, speed);
5154 if (speed)
5156 if (maybe_fma)
5157 /* MADD. */
5158 cost += extra_cost->mult[mode == DImode].add;
5159 else
5160 /* MUL. */
5161 cost += extra_cost->mult[mode == DImode].simple;
5164 return cost;
5166 else
5168 if (speed)
5170 /* Floating-point FMA/FMUL can also support negations of the
5171 operands. */
5172 if (GET_CODE (op0) == NEG)
5173 op0 = XEXP (op0, 0);
5174 if (GET_CODE (op1) == NEG)
5175 op1 = XEXP (op1, 0);
5177 if (maybe_fma)
5178 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5179 cost += extra_cost->fp[mode == DFmode].fma;
5180 else
5181 /* FMUL/FNMUL. */
5182 cost += extra_cost->fp[mode == DFmode].mult;
5185 cost += rtx_cost (op0, MULT, 0, speed)
5186 + rtx_cost (op1, MULT, 1, speed);
5187 return cost;
5191 static int
5192 aarch64_address_cost (rtx x,
5193 machine_mode mode,
5194 addr_space_t as ATTRIBUTE_UNUSED,
5195 bool speed)
5197 enum rtx_code c = GET_CODE (x);
5198 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5199 struct aarch64_address_info info;
5200 int cost = 0;
5201 info.shift = 0;
5203 if (!aarch64_classify_address (&info, x, mode, c, false))
5205 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5207 /* This is a CONST or SYMBOL ref which will be split
5208 in a different way depending on the code model in use.
5209 Cost it through the generic infrastructure. */
5210 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5211 /* Divide through by the cost of one instruction to
5212 bring it to the same units as the address costs. */
5213 cost_symbol_ref /= COSTS_N_INSNS (1);
5214 /* The cost is then the cost of preparing the address,
5215 followed by an immediate (possibly 0) offset. */
5216 return cost_symbol_ref + addr_cost->imm_offset;
5218 else
5220 /* This is most likely a jump table from a case
5221 statement. */
5222 return addr_cost->register_offset;
5226 switch (info.type)
5228 case ADDRESS_LO_SUM:
5229 case ADDRESS_SYMBOLIC:
5230 case ADDRESS_REG_IMM:
5231 cost += addr_cost->imm_offset;
5232 break;
5234 case ADDRESS_REG_WB:
5235 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5236 cost += addr_cost->pre_modify;
5237 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5238 cost += addr_cost->post_modify;
5239 else
5240 gcc_unreachable ();
5242 break;
5244 case ADDRESS_REG_REG:
5245 cost += addr_cost->register_offset;
5246 break;
5248 case ADDRESS_REG_UXTW:
5249 case ADDRESS_REG_SXTW:
5250 cost += addr_cost->register_extend;
5251 break;
5253 default:
5254 gcc_unreachable ();
5258 if (info.shift > 0)
5260 /* For the sake of calculating the cost of the shifted register
5261 component, we can treat same sized modes in the same way. */
5262 switch (GET_MODE_BITSIZE (mode))
5264 case 16:
5265 cost += addr_cost->addr_scale_costs.hi;
5266 break;
5268 case 32:
5269 cost += addr_cost->addr_scale_costs.si;
5270 break;
5272 case 64:
5273 cost += addr_cost->addr_scale_costs.di;
5274 break;
5276 /* We can't tell, or this is a 128-bit vector. */
5277 default:
5278 cost += addr_cost->addr_scale_costs.ti;
5279 break;
5283 return cost;
5286 /* Return true if the RTX X in mode MODE is a zero or sign extract
5287 usable in an ADD or SUB (extended register) instruction. */
5288 static bool
5289 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5291 /* Catch add with a sign extract.
5292 This is add_<optab><mode>_multp2. */
5293 if (GET_CODE (x) == SIGN_EXTRACT
5294 || GET_CODE (x) == ZERO_EXTRACT)
5296 rtx op0 = XEXP (x, 0);
5297 rtx op1 = XEXP (x, 1);
5298 rtx op2 = XEXP (x, 2);
5300 if (GET_CODE (op0) == MULT
5301 && CONST_INT_P (op1)
5302 && op2 == const0_rtx
5303 && CONST_INT_P (XEXP (op0, 1))
5304 && aarch64_is_extend_from_extract (mode,
5305 XEXP (op0, 1),
5306 op1))
5308 return true;
5312 return false;
5315 static bool
5316 aarch64_frint_unspec_p (unsigned int u)
5318 switch (u)
5320 case UNSPEC_FRINTZ:
5321 case UNSPEC_FRINTP:
5322 case UNSPEC_FRINTM:
5323 case UNSPEC_FRINTA:
5324 case UNSPEC_FRINTN:
5325 case UNSPEC_FRINTX:
5326 case UNSPEC_FRINTI:
5327 return true;
5329 default:
5330 return false;
5334 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5335 storing it in *COST. Result is true if the total cost of the operation
5336 has now been calculated. */
5337 static bool
5338 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5340 rtx inner;
5341 rtx comparator;
5342 enum rtx_code cmpcode;
5344 if (COMPARISON_P (op0))
5346 inner = XEXP (op0, 0);
5347 comparator = XEXP (op0, 1);
5348 cmpcode = GET_CODE (op0);
5350 else
5352 inner = op0;
5353 comparator = const0_rtx;
5354 cmpcode = NE;
5357 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5359 /* Conditional branch. */
5360 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5361 return true;
5362 else
5364 if (cmpcode == NE || cmpcode == EQ)
5366 if (comparator == const0_rtx)
5368 /* TBZ/TBNZ/CBZ/CBNZ. */
5369 if (GET_CODE (inner) == ZERO_EXTRACT)
5370 /* TBZ/TBNZ. */
5371 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5372 0, speed);
5373 else
5374 /* CBZ/CBNZ. */
5375 *cost += rtx_cost (inner, cmpcode, 0, speed);
5377 return true;
5380 else if (cmpcode == LT || cmpcode == GE)
5382 /* TBZ/TBNZ. */
5383 if (comparator == const0_rtx)
5384 return true;
5388 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5390 /* It's a conditional operation based on the status flags,
5391 so it must be some flavor of CSEL. */
5393 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5394 if (GET_CODE (op1) == NEG
5395 || GET_CODE (op1) == NOT
5396 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5397 op1 = XEXP (op1, 0);
5399 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5400 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5401 return true;
5404 /* We don't know what this is, cost all operands. */
5405 return false;
5408 /* Calculate the cost of calculating X, storing it in *COST. Result
5409 is true if the total cost of the operation has now been calculated. */
5410 static bool
5411 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5412 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5414 rtx op0, op1, op2;
5415 const struct cpu_cost_table *extra_cost
5416 = aarch64_tune_params->insn_extra_cost;
5417 machine_mode mode = GET_MODE (x);
5419 /* By default, assume that everything has equivalent cost to the
5420 cheapest instruction. Any additional costs are applied as a delta
5421 above this default. */
5422 *cost = COSTS_N_INSNS (1);
5424 /* TODO: The cost infrastructure currently does not handle
5425 vector operations. Assume that all vector operations
5426 are equally expensive. */
5427 if (VECTOR_MODE_P (mode))
5429 if (speed)
5430 *cost += extra_cost->vect.alu;
5431 return true;
5434 switch (code)
5436 case SET:
5437 /* The cost depends entirely on the operands to SET. */
5438 *cost = 0;
5439 op0 = SET_DEST (x);
5440 op1 = SET_SRC (x);
5442 switch (GET_CODE (op0))
5444 case MEM:
5445 if (speed)
5447 rtx address = XEXP (op0, 0);
5448 if (GET_MODE_CLASS (mode) == MODE_INT)
5449 *cost += extra_cost->ldst.store;
5450 else if (mode == SFmode)
5451 *cost += extra_cost->ldst.storef;
5452 else if (mode == DFmode)
5453 *cost += extra_cost->ldst.stored;
5455 *cost +=
5456 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5457 0, speed));
5460 *cost += rtx_cost (op1, SET, 1, speed);
5461 return true;
5463 case SUBREG:
5464 if (! REG_P (SUBREG_REG (op0)))
5465 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5467 /* Fall through. */
5468 case REG:
5469 /* const0_rtx is in general free, but we will use an
5470 instruction to set a register to 0. */
5471 if (REG_P (op1) || op1 == const0_rtx)
5473 /* The cost is 1 per register copied. */
5474 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5475 / UNITS_PER_WORD;
5476 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5478 else
5479 /* Cost is just the cost of the RHS of the set. */
5480 *cost += rtx_cost (op1, SET, 1, speed);
5481 return true;
5483 case ZERO_EXTRACT:
5484 case SIGN_EXTRACT:
5485 /* Bit-field insertion. Strip any redundant widening of
5486 the RHS to meet the width of the target. */
5487 if (GET_CODE (op1) == SUBREG)
5488 op1 = SUBREG_REG (op1);
5489 if ((GET_CODE (op1) == ZERO_EXTEND
5490 || GET_CODE (op1) == SIGN_EXTEND)
5491 && CONST_INT_P (XEXP (op0, 1))
5492 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5493 >= INTVAL (XEXP (op0, 1))))
5494 op1 = XEXP (op1, 0);
5496 if (CONST_INT_P (op1))
5498 /* MOV immediate is assumed to always be cheap. */
5499 *cost = COSTS_N_INSNS (1);
5501 else
5503 /* BFM. */
5504 if (speed)
5505 *cost += extra_cost->alu.bfi;
5506 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5509 return true;
5511 default:
5512 /* We can't make sense of this, assume default cost. */
5513 *cost = COSTS_N_INSNS (1);
5514 return false;
5516 return false;
5518 case CONST_INT:
5519 /* If an instruction can incorporate a constant within the
5520 instruction, the instruction's expression avoids calling
5521 rtx_cost() on the constant. If rtx_cost() is called on a
5522 constant, then it is usually because the constant must be
5523 moved into a register by one or more instructions.
5525 The exception is constant 0, which can be expressed
5526 as XZR/WZR and is therefore free. The exception to this is
5527 if we have (set (reg) (const0_rtx)) in which case we must cost
5528 the move. However, we can catch that when we cost the SET, so
5529 we don't need to consider that here. */
5530 if (x == const0_rtx)
5531 *cost = 0;
5532 else
5534 /* To an approximation, building any other constant is
5535 proportionally expensive to the number of instructions
5536 required to build that constant. This is true whether we
5537 are compiling for SPEED or otherwise. */
5538 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5539 (NULL_RTX, x, false, mode));
5541 return true;
5543 case CONST_DOUBLE:
5544 if (speed)
5546 /* mov[df,sf]_aarch64. */
5547 if (aarch64_float_const_representable_p (x))
5548 /* FMOV (scalar immediate). */
5549 *cost += extra_cost->fp[mode == DFmode].fpconst;
5550 else if (!aarch64_float_const_zero_rtx_p (x))
5552 /* This will be a load from memory. */
5553 if (mode == DFmode)
5554 *cost += extra_cost->ldst.loadd;
5555 else
5556 *cost += extra_cost->ldst.loadf;
5558 else
5559 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5560 or MOV v0.s[0], wzr - neither of which are modeled by the
5561 cost tables. Just use the default cost. */
5566 return true;
5568 case MEM:
5569 if (speed)
5571 /* For loads we want the base cost of a load, plus an
5572 approximation for the additional cost of the addressing
5573 mode. */
5574 rtx address = XEXP (x, 0);
5575 if (GET_MODE_CLASS (mode) == MODE_INT)
5576 *cost += extra_cost->ldst.load;
5577 else if (mode == SFmode)
5578 *cost += extra_cost->ldst.loadf;
5579 else if (mode == DFmode)
5580 *cost += extra_cost->ldst.loadd;
5582 *cost +=
5583 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5584 0, speed));
5587 return true;
5589 case NEG:
5590 op0 = XEXP (x, 0);
5592 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5594 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5595 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5597 /* CSETM. */
5598 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5599 return true;
5602 /* Cost this as SUB wzr, X. */
5603 op0 = CONST0_RTX (GET_MODE (x));
5604 op1 = XEXP (x, 0);
5605 goto cost_minus;
5608 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5610 /* Support (neg(fma...)) as a single instruction only if
5611 sign of zeros is unimportant. This matches the decision
5612 making in aarch64.md. */
5613 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5615 /* FNMADD. */
5616 *cost = rtx_cost (op0, NEG, 0, speed);
5617 return true;
5619 if (speed)
5620 /* FNEG. */
5621 *cost += extra_cost->fp[mode == DFmode].neg;
5622 return false;
5625 return false;
5627 case CLRSB:
5628 case CLZ:
5629 if (speed)
5630 *cost += extra_cost->alu.clz;
5632 return false;
5634 case COMPARE:
5635 op0 = XEXP (x, 0);
5636 op1 = XEXP (x, 1);
5638 if (op1 == const0_rtx
5639 && GET_CODE (op0) == AND)
5641 x = op0;
5642 goto cost_logic;
5645 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5647 /* TODO: A write to the CC flags possibly costs extra, this
5648 needs encoding in the cost tables. */
5650 /* CC_ZESWPmode supports zero extend for free. */
5651 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5652 op0 = XEXP (op0, 0);
5654 /* ANDS. */
5655 if (GET_CODE (op0) == AND)
5657 x = op0;
5658 goto cost_logic;
5661 if (GET_CODE (op0) == PLUS)
5663 /* ADDS (and CMN alias). */
5664 x = op0;
5665 goto cost_plus;
5668 if (GET_CODE (op0) == MINUS)
5670 /* SUBS. */
5671 x = op0;
5672 goto cost_minus;
5675 if (GET_CODE (op1) == NEG)
5677 /* CMN. */
5678 if (speed)
5679 *cost += extra_cost->alu.arith;
5681 *cost += rtx_cost (op0, COMPARE, 0, speed);
5682 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5683 return true;
5686 /* CMP.
5688 Compare can freely swap the order of operands, and
5689 canonicalization puts the more complex operation first.
5690 But the integer MINUS logic expects the shift/extend
5691 operation in op1. */
5692 if (! (REG_P (op0)
5693 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5695 op0 = XEXP (x, 1);
5696 op1 = XEXP (x, 0);
5698 goto cost_minus;
5701 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5703 /* FCMP. */
5704 if (speed)
5705 *cost += extra_cost->fp[mode == DFmode].compare;
5707 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5709 /* FCMP supports constant 0.0 for no extra cost. */
5710 return true;
5712 return false;
5715 return false;
5717 case MINUS:
5719 op0 = XEXP (x, 0);
5720 op1 = XEXP (x, 1);
5722 cost_minus:
5723 /* Detect valid immediates. */
5724 if ((GET_MODE_CLASS (mode) == MODE_INT
5725 || (GET_MODE_CLASS (mode) == MODE_CC
5726 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5727 && CONST_INT_P (op1)
5728 && aarch64_uimm12_shift (INTVAL (op1)))
5730 *cost += rtx_cost (op0, MINUS, 0, speed);
5732 if (speed)
5733 /* SUB(S) (immediate). */
5734 *cost += extra_cost->alu.arith;
5735 return true;
5739 /* Look for SUB (extended register). */
5740 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5742 if (speed)
5743 *cost += extra_cost->alu.arith_shift;
5745 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5746 (enum rtx_code) GET_CODE (op1),
5747 0, speed);
5748 return true;
5751 rtx new_op1 = aarch64_strip_extend (op1);
5753 /* Cost this as an FMA-alike operation. */
5754 if ((GET_CODE (new_op1) == MULT
5755 || GET_CODE (new_op1) == ASHIFT)
5756 && code != COMPARE)
5758 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5759 (enum rtx_code) code,
5760 speed);
5761 *cost += rtx_cost (op0, MINUS, 0, speed);
5762 return true;
5765 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5767 if (speed)
5769 if (GET_MODE_CLASS (mode) == MODE_INT)
5770 /* SUB(S). */
5771 *cost += extra_cost->alu.arith;
5772 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5773 /* FSUB. */
5774 *cost += extra_cost->fp[mode == DFmode].addsub;
5776 return true;
5779 case PLUS:
5781 rtx new_op0;
5783 op0 = XEXP (x, 0);
5784 op1 = XEXP (x, 1);
5786 cost_plus:
5787 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5788 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5790 /* CSINC. */
5791 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5792 *cost += rtx_cost (op1, PLUS, 1, speed);
5793 return true;
5796 if (GET_MODE_CLASS (mode) == MODE_INT
5797 && CONST_INT_P (op1)
5798 && aarch64_uimm12_shift (INTVAL (op1)))
5800 *cost += rtx_cost (op0, PLUS, 0, speed);
5802 if (speed)
5803 /* ADD (immediate). */
5804 *cost += extra_cost->alu.arith;
5805 return true;
5808 /* Look for ADD (extended register). */
5809 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5811 if (speed)
5812 *cost += extra_cost->alu.arith_shift;
5814 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5815 (enum rtx_code) GET_CODE (op0),
5816 0, speed);
5817 return true;
5820 /* Strip any extend, leave shifts behind as we will
5821 cost them through mult_cost. */
5822 new_op0 = aarch64_strip_extend (op0);
5824 if (GET_CODE (new_op0) == MULT
5825 || GET_CODE (new_op0) == ASHIFT)
5827 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5828 speed);
5829 *cost += rtx_cost (op1, PLUS, 1, speed);
5830 return true;
5833 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5834 + rtx_cost (op1, PLUS, 1, speed));
5836 if (speed)
5838 if (GET_MODE_CLASS (mode) == MODE_INT)
5839 /* ADD. */
5840 *cost += extra_cost->alu.arith;
5841 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5842 /* FADD. */
5843 *cost += extra_cost->fp[mode == DFmode].addsub;
5845 return true;
5848 case BSWAP:
5849 *cost = COSTS_N_INSNS (1);
5851 if (speed)
5852 *cost += extra_cost->alu.rev;
5854 return false;
5856 case IOR:
5857 if (aarch_rev16_p (x))
5859 *cost = COSTS_N_INSNS (1);
5861 if (speed)
5862 *cost += extra_cost->alu.rev;
5864 return true;
5866 /* Fall through. */
5867 case XOR:
5868 case AND:
5869 cost_logic:
5870 op0 = XEXP (x, 0);
5871 op1 = XEXP (x, 1);
5873 if (code == AND
5874 && GET_CODE (op0) == MULT
5875 && CONST_INT_P (XEXP (op0, 1))
5876 && CONST_INT_P (op1)
5877 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5878 INTVAL (op1)) != 0)
5880 /* This is a UBFM/SBFM. */
5881 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5882 if (speed)
5883 *cost += extra_cost->alu.bfx;
5884 return true;
5887 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5889 /* We possibly get the immediate for free, this is not
5890 modelled. */
5891 if (CONST_INT_P (op1)
5892 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5894 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5896 if (speed)
5897 *cost += extra_cost->alu.logical;
5899 return true;
5901 else
5903 rtx new_op0 = op0;
5905 /* Handle ORN, EON, or BIC. */
5906 if (GET_CODE (op0) == NOT)
5907 op0 = XEXP (op0, 0);
5909 new_op0 = aarch64_strip_shift (op0);
5911 /* If we had a shift on op0 then this is a logical-shift-
5912 by-register/immediate operation. Otherwise, this is just
5913 a logical operation. */
5914 if (speed)
5916 if (new_op0 != op0)
5918 /* Shift by immediate. */
5919 if (CONST_INT_P (XEXP (op0, 1)))
5920 *cost += extra_cost->alu.log_shift;
5921 else
5922 *cost += extra_cost->alu.log_shift_reg;
5924 else
5925 *cost += extra_cost->alu.logical;
5928 /* In both cases we want to cost both operands. */
5929 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5930 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5932 return true;
5935 return false;
5937 case NOT:
5938 /* MVN. */
5939 if (speed)
5940 *cost += extra_cost->alu.logical;
5942 /* The logical instruction could have the shifted register form,
5943 but the cost is the same if the shift is processed as a separate
5944 instruction, so we don't bother with it here. */
5945 return false;
5947 case ZERO_EXTEND:
5949 op0 = XEXP (x, 0);
5950 /* If a value is written in SI mode, then zero extended to DI
5951 mode, the operation will in general be free as a write to
5952 a 'w' register implicitly zeroes the upper bits of an 'x'
5953 register. However, if this is
5955 (set (reg) (zero_extend (reg)))
5957 we must cost the explicit register move. */
5958 if (mode == DImode
5959 && GET_MODE (op0) == SImode
5960 && outer == SET)
5962 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5964 if (!op_cost && speed)
5965 /* MOV. */
5966 *cost += extra_cost->alu.extend;
5967 else
5968 /* Free, the cost is that of the SI mode operation. */
5969 *cost = op_cost;
5971 return true;
5973 else if (MEM_P (XEXP (x, 0)))
5975 /* All loads can zero extend to any size for free. */
5976 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5977 return true;
5980 /* UXTB/UXTH. */
5981 if (speed)
5982 *cost += extra_cost->alu.extend;
5984 return false;
5986 case SIGN_EXTEND:
5987 if (MEM_P (XEXP (x, 0)))
5989 /* LDRSH. */
5990 if (speed)
5992 rtx address = XEXP (XEXP (x, 0), 0);
5993 *cost += extra_cost->ldst.load_sign_extend;
5995 *cost +=
5996 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5997 0, speed));
5999 return true;
6002 if (speed)
6003 *cost += extra_cost->alu.extend;
6004 return false;
6006 case ASHIFT:
6007 op0 = XEXP (x, 0);
6008 op1 = XEXP (x, 1);
6010 if (CONST_INT_P (op1))
6012 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6013 aliases. */
6014 if (speed)
6015 *cost += extra_cost->alu.shift;
6017 /* We can incorporate zero/sign extend for free. */
6018 if (GET_CODE (op0) == ZERO_EXTEND
6019 || GET_CODE (op0) == SIGN_EXTEND)
6020 op0 = XEXP (op0, 0);
6022 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6023 return true;
6025 else
6027 /* LSLV. */
6028 if (speed)
6029 *cost += extra_cost->alu.shift_reg;
6031 return false; /* All arguments need to be in registers. */
6034 case ROTATE:
6035 case ROTATERT:
6036 case LSHIFTRT:
6037 case ASHIFTRT:
6038 op0 = XEXP (x, 0);
6039 op1 = XEXP (x, 1);
6041 if (CONST_INT_P (op1))
6043 /* ASR (immediate) and friends. */
6044 if (speed)
6045 *cost += extra_cost->alu.shift;
6047 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6048 return true;
6050 else
6053 /* ASR (register) and friends. */
6054 if (speed)
6055 *cost += extra_cost->alu.shift_reg;
6057 return false; /* All arguments need to be in registers. */
6060 case SYMBOL_REF:
6062 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6064 /* LDR. */
6065 if (speed)
6066 *cost += extra_cost->ldst.load;
6068 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6069 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6071 /* ADRP, followed by ADD. */
6072 *cost += COSTS_N_INSNS (1);
6073 if (speed)
6074 *cost += 2 * extra_cost->alu.arith;
6076 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6077 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6079 /* ADR. */
6080 if (speed)
6081 *cost += extra_cost->alu.arith;
6084 if (flag_pic)
6086 /* One extra load instruction, after accessing the GOT. */
6087 *cost += COSTS_N_INSNS (1);
6088 if (speed)
6089 *cost += extra_cost->ldst.load;
6091 return true;
6093 case HIGH:
6094 case LO_SUM:
6095 /* ADRP/ADD (immediate). */
6096 if (speed)
6097 *cost += extra_cost->alu.arith;
6098 return true;
6100 case ZERO_EXTRACT:
6101 case SIGN_EXTRACT:
6102 /* UBFX/SBFX. */
6103 if (speed)
6104 *cost += extra_cost->alu.bfx;
6106 /* We can trust that the immediates used will be correct (there
6107 are no by-register forms), so we need only cost op0. */
6108 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
6109 return true;
6111 case MULT:
6112 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6113 /* aarch64_rtx_mult_cost always handles recursion to its
6114 operands. */
6115 return true;
6117 case MOD:
6118 case UMOD:
6119 if (speed)
6121 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6122 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6123 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
6124 else if (GET_MODE (x) == DFmode)
6125 *cost += (extra_cost->fp[1].mult
6126 + extra_cost->fp[1].div);
6127 else if (GET_MODE (x) == SFmode)
6128 *cost += (extra_cost->fp[0].mult
6129 + extra_cost->fp[0].div);
6131 return false; /* All arguments need to be in registers. */
6133 case DIV:
6134 case UDIV:
6135 case SQRT:
6136 if (speed)
6138 if (GET_MODE_CLASS (mode) == MODE_INT)
6139 /* There is no integer SQRT, so only DIV and UDIV can get
6140 here. */
6141 *cost += extra_cost->mult[mode == DImode].idiv;
6142 else
6143 *cost += extra_cost->fp[mode == DFmode].div;
6145 return false; /* All arguments need to be in registers. */
6147 case IF_THEN_ELSE:
6148 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6149 XEXP (x, 2), cost, speed);
6151 case EQ:
6152 case NE:
6153 case GT:
6154 case GTU:
6155 case LT:
6156 case LTU:
6157 case GE:
6158 case GEU:
6159 case LE:
6160 case LEU:
6162 return false; /* All arguments must be in registers. */
6164 case FMA:
6165 op0 = XEXP (x, 0);
6166 op1 = XEXP (x, 1);
6167 op2 = XEXP (x, 2);
6169 if (speed)
6170 *cost += extra_cost->fp[mode == DFmode].fma;
6172 /* FMSUB, FNMADD, and FNMSUB are free. */
6173 if (GET_CODE (op0) == NEG)
6174 op0 = XEXP (op0, 0);
6176 if (GET_CODE (op2) == NEG)
6177 op2 = XEXP (op2, 0);
6179 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6180 and the by-element operand as operand 0. */
6181 if (GET_CODE (op1) == NEG)
6182 op1 = XEXP (op1, 0);
6184 /* Catch vector-by-element operations. The by-element operand can
6185 either be (vec_duplicate (vec_select (x))) or just
6186 (vec_select (x)), depending on whether we are multiplying by
6187 a vector or a scalar.
6189 Canonicalization is not very good in these cases, FMA4 will put the
6190 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6191 if (GET_CODE (op0) == VEC_DUPLICATE)
6192 op0 = XEXP (op0, 0);
6193 else if (GET_CODE (op1) == VEC_DUPLICATE)
6194 op1 = XEXP (op1, 0);
6196 if (GET_CODE (op0) == VEC_SELECT)
6197 op0 = XEXP (op0, 0);
6198 else if (GET_CODE (op1) == VEC_SELECT)
6199 op1 = XEXP (op1, 0);
6201 /* If the remaining parameters are not registers,
6202 get the cost to put them into registers. */
6203 *cost += rtx_cost (op0, FMA, 0, speed);
6204 *cost += rtx_cost (op1, FMA, 1, speed);
6205 *cost += rtx_cost (op2, FMA, 2, speed);
6206 return true;
6208 case FLOAT_EXTEND:
6209 if (speed)
6210 *cost += extra_cost->fp[mode == DFmode].widen;
6211 return false;
6213 case FLOAT_TRUNCATE:
6214 if (speed)
6215 *cost += extra_cost->fp[mode == DFmode].narrow;
6216 return false;
6218 case FIX:
6219 case UNSIGNED_FIX:
6220 x = XEXP (x, 0);
6221 /* Strip the rounding part. They will all be implemented
6222 by the fcvt* family of instructions anyway. */
6223 if (GET_CODE (x) == UNSPEC)
6225 unsigned int uns_code = XINT (x, 1);
6227 if (uns_code == UNSPEC_FRINTA
6228 || uns_code == UNSPEC_FRINTM
6229 || uns_code == UNSPEC_FRINTN
6230 || uns_code == UNSPEC_FRINTP
6231 || uns_code == UNSPEC_FRINTZ)
6232 x = XVECEXP (x, 0, 0);
6235 if (speed)
6236 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6238 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6239 return true;
6241 case ABS:
6242 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6244 /* FABS and FNEG are analogous. */
6245 if (speed)
6246 *cost += extra_cost->fp[mode == DFmode].neg;
6248 else
6250 /* Integer ABS will either be split to
6251 two arithmetic instructions, or will be an ABS
6252 (scalar), which we don't model. */
6253 *cost = COSTS_N_INSNS (2);
6254 if (speed)
6255 *cost += 2 * extra_cost->alu.arith;
6257 return false;
6259 case SMAX:
6260 case SMIN:
6261 if (speed)
6263 /* FMAXNM/FMINNM/FMAX/FMIN.
6264 TODO: This may not be accurate for all implementations, but
6265 we do not model this in the cost tables. */
6266 *cost += extra_cost->fp[mode == DFmode].addsub;
6268 return false;
6270 case UNSPEC:
6271 /* The floating point round to integer frint* instructions. */
6272 if (aarch64_frint_unspec_p (XINT (x, 1)))
6274 if (speed)
6275 *cost += extra_cost->fp[mode == DFmode].roundint;
6277 return false;
6280 if (XINT (x, 1) == UNSPEC_RBIT)
6282 if (speed)
6283 *cost += extra_cost->alu.rev;
6285 return false;
6287 break;
6289 case TRUNCATE:
6291 /* Decompose <su>muldi3_highpart. */
6292 if (/* (truncate:DI */
6293 mode == DImode
6294 /* (lshiftrt:TI */
6295 && GET_MODE (XEXP (x, 0)) == TImode
6296 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6297 /* (mult:TI */
6298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6299 /* (ANY_EXTEND:TI (reg:DI))
6300 (ANY_EXTEND:TI (reg:DI))) */
6301 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6302 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6303 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6304 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6305 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6306 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6307 /* (const_int 64) */
6308 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6309 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6311 /* UMULH/SMULH. */
6312 if (speed)
6313 *cost += extra_cost->mult[mode == DImode].extend;
6314 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6315 MULT, 0, speed);
6316 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6317 MULT, 1, speed);
6318 return true;
6321 /* Fall through. */
6322 default:
6323 break;
6326 if (dump_file && (dump_flags & TDF_DETAILS))
6327 fprintf (dump_file,
6328 "\nFailed to cost RTX. Assuming default cost.\n");
6330 return true;
6333 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6334 calculated for X. This cost is stored in *COST. Returns true
6335 if the total cost of X was calculated. */
6336 static bool
6337 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6338 int param, int *cost, bool speed)
6340 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6342 if (dump_file && (dump_flags & TDF_DETAILS))
6344 print_rtl_single (dump_file, x);
6345 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6346 speed ? "Hot" : "Cold",
6347 *cost, result ? "final" : "partial");
6350 return result;
6353 static int
6354 aarch64_register_move_cost (machine_mode mode,
6355 reg_class_t from_i, reg_class_t to_i)
6357 enum reg_class from = (enum reg_class) from_i;
6358 enum reg_class to = (enum reg_class) to_i;
6359 const struct cpu_regmove_cost *regmove_cost
6360 = aarch64_tune_params->regmove_cost;
6362 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6363 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6364 to = GENERAL_REGS;
6366 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6367 from = GENERAL_REGS;
6369 /* Moving between GPR and stack cost is the same as GP2GP. */
6370 if ((from == GENERAL_REGS && to == STACK_REG)
6371 || (to == GENERAL_REGS && from == STACK_REG))
6372 return regmove_cost->GP2GP;
6374 /* To/From the stack register, we move via the gprs. */
6375 if (to == STACK_REG || from == STACK_REG)
6376 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6377 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6379 if (GET_MODE_SIZE (mode) == 16)
6381 /* 128-bit operations on general registers require 2 instructions. */
6382 if (from == GENERAL_REGS && to == GENERAL_REGS)
6383 return regmove_cost->GP2GP * 2;
6384 else if (from == GENERAL_REGS)
6385 return regmove_cost->GP2FP * 2;
6386 else if (to == GENERAL_REGS)
6387 return regmove_cost->FP2GP * 2;
6389 /* When AdvSIMD instructions are disabled it is not possible to move
6390 a 128-bit value directly between Q registers. This is handled in
6391 secondary reload. A general register is used as a scratch to move
6392 the upper DI value and the lower DI value is moved directly,
6393 hence the cost is the sum of three moves. */
6394 if (! TARGET_SIMD)
6395 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6397 return regmove_cost->FP2FP;
6400 if (from == GENERAL_REGS && to == GENERAL_REGS)
6401 return regmove_cost->GP2GP;
6402 else if (from == GENERAL_REGS)
6403 return regmove_cost->GP2FP;
6404 else if (to == GENERAL_REGS)
6405 return regmove_cost->FP2GP;
6407 return regmove_cost->FP2FP;
6410 static int
6411 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6412 reg_class_t rclass ATTRIBUTE_UNUSED,
6413 bool in ATTRIBUTE_UNUSED)
6415 return aarch64_tune_params->memmov_cost;
6418 /* Return the number of instructions that can be issued per cycle. */
6419 static int
6420 aarch64_sched_issue_rate (void)
6422 return aarch64_tune_params->issue_rate;
6425 /* Vectorizer cost model target hooks. */
6427 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6428 static int
6429 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6430 tree vectype,
6431 int misalign ATTRIBUTE_UNUSED)
6433 unsigned elements;
6435 switch (type_of_cost)
6437 case scalar_stmt:
6438 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6440 case scalar_load:
6441 return aarch64_tune_params->vec_costs->scalar_load_cost;
6443 case scalar_store:
6444 return aarch64_tune_params->vec_costs->scalar_store_cost;
6446 case vector_stmt:
6447 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6449 case vector_load:
6450 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6452 case vector_store:
6453 return aarch64_tune_params->vec_costs->vec_store_cost;
6455 case vec_to_scalar:
6456 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6458 case scalar_to_vec:
6459 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6461 case unaligned_load:
6462 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6464 case unaligned_store:
6465 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6467 case cond_branch_taken:
6468 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6470 case cond_branch_not_taken:
6471 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6473 case vec_perm:
6474 case vec_promote_demote:
6475 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6477 case vec_construct:
6478 elements = TYPE_VECTOR_SUBPARTS (vectype);
6479 return elements / 2 + 1;
6481 default:
6482 gcc_unreachable ();
6486 /* Implement targetm.vectorize.add_stmt_cost. */
6487 static unsigned
6488 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6489 struct _stmt_vec_info *stmt_info, int misalign,
6490 enum vect_cost_model_location where)
6492 unsigned *cost = (unsigned *) data;
6493 unsigned retval = 0;
6495 if (flag_vect_cost_model)
6497 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6498 int stmt_cost =
6499 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6501 /* Statements in an inner loop relative to the loop being
6502 vectorized are weighted more heavily. The value here is
6503 a function (linear for now) of the loop nest level. */
6504 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6506 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6507 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6508 unsigned nest_level = loop_depth (loop);
6510 count *= nest_level;
6513 retval = (unsigned) (count * stmt_cost);
6514 cost[where] += retval;
6517 return retval;
6520 static void initialize_aarch64_code_model (void);
6522 /* Parse the architecture extension string. */
6524 static void
6525 aarch64_parse_extension (char *str)
6527 /* The extension string is parsed left to right. */
6528 const struct aarch64_option_extension *opt = NULL;
6530 /* Flag to say whether we are adding or removing an extension. */
6531 int adding_ext = -1;
6533 while (str != NULL && *str != 0)
6535 char *ext;
6536 size_t len;
6538 str++;
6539 ext = strchr (str, '+');
6541 if (ext != NULL)
6542 len = ext - str;
6543 else
6544 len = strlen (str);
6546 if (len >= 2 && strncmp (str, "no", 2) == 0)
6548 adding_ext = 0;
6549 len -= 2;
6550 str += 2;
6552 else if (len > 0)
6553 adding_ext = 1;
6555 if (len == 0)
6557 error ("missing feature modifier after %qs", adding_ext ? "+"
6558 : "+no");
6559 return;
6562 /* Scan over the extensions table trying to find an exact match. */
6563 for (opt = all_extensions; opt->name != NULL; opt++)
6565 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6567 /* Add or remove the extension. */
6568 if (adding_ext)
6569 aarch64_isa_flags |= opt->flags_on;
6570 else
6571 aarch64_isa_flags &= ~(opt->flags_off);
6572 break;
6576 if (opt->name == NULL)
6578 /* Extension not found in list. */
6579 error ("unknown feature modifier %qs", str);
6580 return;
6583 str = ext;
6586 return;
6589 /* Parse the ARCH string. */
6591 static void
6592 aarch64_parse_arch (void)
6594 char *ext;
6595 const struct processor *arch;
6596 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6597 size_t len;
6599 strcpy (str, aarch64_arch_string);
6601 ext = strchr (str, '+');
6603 if (ext != NULL)
6604 len = ext - str;
6605 else
6606 len = strlen (str);
6608 if (len == 0)
6610 error ("missing arch name in -march=%qs", str);
6611 return;
6614 /* Loop through the list of supported ARCHs to find a match. */
6615 for (arch = all_architectures; arch->name != NULL; arch++)
6617 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6619 selected_arch = arch;
6620 aarch64_isa_flags = selected_arch->flags;
6622 if (!selected_cpu)
6623 selected_cpu = &all_cores[selected_arch->core];
6625 if (ext != NULL)
6627 /* ARCH string contains at least one extension. */
6628 aarch64_parse_extension (ext);
6631 if (strcmp (selected_arch->arch, selected_cpu->arch))
6633 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6634 selected_cpu->name, selected_arch->name);
6637 return;
6641 /* ARCH name not found in list. */
6642 error ("unknown value %qs for -march", str);
6643 return;
6646 /* Parse the CPU string. */
6648 static void
6649 aarch64_parse_cpu (void)
6651 char *ext;
6652 const struct processor *cpu;
6653 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6654 size_t len;
6656 strcpy (str, aarch64_cpu_string);
6658 ext = strchr (str, '+');
6660 if (ext != NULL)
6661 len = ext - str;
6662 else
6663 len = strlen (str);
6665 if (len == 0)
6667 error ("missing cpu name in -mcpu=%qs", str);
6668 return;
6671 /* Loop through the list of supported CPUs to find a match. */
6672 for (cpu = all_cores; cpu->name != NULL; cpu++)
6674 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6676 selected_cpu = cpu;
6677 aarch64_isa_flags = selected_cpu->flags;
6679 if (ext != NULL)
6681 /* CPU string contains at least one extension. */
6682 aarch64_parse_extension (ext);
6685 return;
6689 /* CPU name not found in list. */
6690 error ("unknown value %qs for -mcpu", str);
6691 return;
6694 /* Parse the TUNE string. */
6696 static void
6697 aarch64_parse_tune (void)
6699 const struct processor *cpu;
6700 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6701 strcpy (str, aarch64_tune_string);
6703 /* Loop through the list of supported CPUs to find a match. */
6704 for (cpu = all_cores; cpu->name != NULL; cpu++)
6706 if (strcmp (cpu->name, str) == 0)
6708 selected_tune = cpu;
6709 return;
6713 /* CPU name not found in list. */
6714 error ("unknown value %qs for -mtune", str);
6715 return;
6719 /* Implement TARGET_OPTION_OVERRIDE. */
6721 static void
6722 aarch64_override_options (void)
6724 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6725 If either of -march or -mtune is given, they override their
6726 respective component of -mcpu.
6728 So, first parse AARCH64_CPU_STRING, then the others, be careful
6729 with -march as, if -mcpu is not present on the command line, march
6730 must set a sensible default CPU. */
6731 if (aarch64_cpu_string)
6733 aarch64_parse_cpu ();
6736 if (aarch64_arch_string)
6738 aarch64_parse_arch ();
6741 if (aarch64_tune_string)
6743 aarch64_parse_tune ();
6746 #ifndef HAVE_AS_MABI_OPTION
6747 /* The compiler may have been configured with 2.23.* binutils, which does
6748 not have support for ILP32. */
6749 if (TARGET_ILP32)
6750 error ("Assembler does not support -mabi=ilp32");
6751 #endif
6753 initialize_aarch64_code_model ();
6755 aarch64_build_bitmask_table ();
6757 /* This target defaults to strict volatile bitfields. */
6758 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6759 flag_strict_volatile_bitfields = 1;
6761 /* If the user did not specify a processor, choose the default
6762 one for them. This will be the CPU set during configuration using
6763 --with-cpu, otherwise it is "generic". */
6764 if (!selected_cpu)
6766 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6767 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6770 gcc_assert (selected_cpu);
6772 if (!selected_tune)
6773 selected_tune = selected_cpu;
6775 aarch64_tune_flags = selected_tune->flags;
6776 aarch64_tune = selected_tune->core;
6777 aarch64_tune_params = selected_tune->tune;
6778 aarch64_architecture_version = selected_cpu->architecture_version;
6780 if (aarch64_fix_a53_err835769 == 2)
6782 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6783 aarch64_fix_a53_err835769 = 1;
6784 #else
6785 aarch64_fix_a53_err835769 = 0;
6786 #endif
6789 /* If not opzimizing for size, set the default
6790 alignment to what the target wants */
6791 if (!optimize_size)
6793 if (align_loops <= 0)
6794 align_loops = aarch64_tune_params->loop_align;
6795 if (align_jumps <= 0)
6796 align_jumps = aarch64_tune_params->jump_align;
6797 if (align_functions <= 0)
6798 align_functions = aarch64_tune_params->function_align;
6801 aarch64_override_options_after_change ();
6804 /* Implement targetm.override_options_after_change. */
6806 static void
6807 aarch64_override_options_after_change (void)
6809 if (flag_omit_frame_pointer)
6810 flag_omit_leaf_frame_pointer = false;
6811 else if (flag_omit_leaf_frame_pointer)
6812 flag_omit_frame_pointer = true;
6815 static struct machine_function *
6816 aarch64_init_machine_status (void)
6818 struct machine_function *machine;
6819 machine = ggc_cleared_alloc<machine_function> ();
6820 return machine;
6823 void
6824 aarch64_init_expanders (void)
6826 init_machine_status = aarch64_init_machine_status;
6829 /* A checking mechanism for the implementation of the various code models. */
6830 static void
6831 initialize_aarch64_code_model (void)
6833 if (flag_pic)
6835 switch (aarch64_cmodel_var)
6837 case AARCH64_CMODEL_TINY:
6838 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6839 break;
6840 case AARCH64_CMODEL_SMALL:
6841 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6842 break;
6843 case AARCH64_CMODEL_LARGE:
6844 sorry ("code model %qs with -f%s", "large",
6845 flag_pic > 1 ? "PIC" : "pic");
6846 default:
6847 gcc_unreachable ();
6850 else
6851 aarch64_cmodel = aarch64_cmodel_var;
6854 /* Return true if SYMBOL_REF X binds locally. */
6856 static bool
6857 aarch64_symbol_binds_local_p (const_rtx x)
6859 return (SYMBOL_REF_DECL (x)
6860 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6861 : SYMBOL_REF_LOCAL_P (x));
6864 /* Return true if SYMBOL_REF X is thread local */
6865 static bool
6866 aarch64_tls_symbol_p (rtx x)
6868 if (! TARGET_HAVE_TLS)
6869 return false;
6871 if (GET_CODE (x) != SYMBOL_REF)
6872 return false;
6874 return SYMBOL_REF_TLS_MODEL (x) != 0;
6877 /* Classify a TLS symbol into one of the TLS kinds. */
6878 enum aarch64_symbol_type
6879 aarch64_classify_tls_symbol (rtx x)
6881 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6883 switch (tls_kind)
6885 case TLS_MODEL_GLOBAL_DYNAMIC:
6886 case TLS_MODEL_LOCAL_DYNAMIC:
6887 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6889 case TLS_MODEL_INITIAL_EXEC:
6890 return SYMBOL_SMALL_GOTTPREL;
6892 case TLS_MODEL_LOCAL_EXEC:
6893 return SYMBOL_SMALL_TPREL;
6895 case TLS_MODEL_EMULATED:
6896 case TLS_MODEL_NONE:
6897 return SYMBOL_FORCE_TO_MEM;
6899 default:
6900 gcc_unreachable ();
6904 /* Return the method that should be used to access SYMBOL_REF or
6905 LABEL_REF X in context CONTEXT. */
6907 enum aarch64_symbol_type
6908 aarch64_classify_symbol (rtx x, rtx offset,
6909 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6911 if (GET_CODE (x) == LABEL_REF)
6913 switch (aarch64_cmodel)
6915 case AARCH64_CMODEL_LARGE:
6916 return SYMBOL_FORCE_TO_MEM;
6918 case AARCH64_CMODEL_TINY_PIC:
6919 case AARCH64_CMODEL_TINY:
6920 return SYMBOL_TINY_ABSOLUTE;
6922 case AARCH64_CMODEL_SMALL_PIC:
6923 case AARCH64_CMODEL_SMALL:
6924 return SYMBOL_SMALL_ABSOLUTE;
6926 default:
6927 gcc_unreachable ();
6931 if (GET_CODE (x) == SYMBOL_REF)
6933 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6934 return SYMBOL_FORCE_TO_MEM;
6936 if (aarch64_tls_symbol_p (x))
6937 return aarch64_classify_tls_symbol (x);
6939 switch (aarch64_cmodel)
6941 case AARCH64_CMODEL_TINY:
6942 /* When we retreive symbol + offset address, we have to make sure
6943 the offset does not cause overflow of the final address. But
6944 we have no way of knowing the address of symbol at compile time
6945 so we can't accurately say if the distance between the PC and
6946 symbol + offset is outside the addressible range of +/-1M in the
6947 TINY code model. So we rely on images not being greater than
6948 1M and cap the offset at 1M and anything beyond 1M will have to
6949 be loaded using an alternative mechanism. */
6950 if (SYMBOL_REF_WEAK (x)
6951 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
6952 return SYMBOL_FORCE_TO_MEM;
6953 return SYMBOL_TINY_ABSOLUTE;
6955 case AARCH64_CMODEL_SMALL:
6956 /* Same reasoning as the tiny code model, but the offset cap here is
6957 4G. */
6958 if (SYMBOL_REF_WEAK (x)
6959 || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
6960 || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
6961 return SYMBOL_FORCE_TO_MEM;
6962 return SYMBOL_SMALL_ABSOLUTE;
6964 case AARCH64_CMODEL_TINY_PIC:
6965 if (!aarch64_symbol_binds_local_p (x))
6966 return SYMBOL_TINY_GOT;
6967 return SYMBOL_TINY_ABSOLUTE;
6969 case AARCH64_CMODEL_SMALL_PIC:
6970 if (!aarch64_symbol_binds_local_p (x))
6971 return SYMBOL_SMALL_GOT;
6972 return SYMBOL_SMALL_ABSOLUTE;
6974 default:
6975 gcc_unreachable ();
6979 /* By default push everything into the constant pool. */
6980 return SYMBOL_FORCE_TO_MEM;
6983 bool
6984 aarch64_constant_address_p (rtx x)
6986 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6989 bool
6990 aarch64_legitimate_pic_operand_p (rtx x)
6992 if (GET_CODE (x) == SYMBOL_REF
6993 || (GET_CODE (x) == CONST
6994 && GET_CODE (XEXP (x, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6996 return false;
6998 return true;
7001 /* Return true if X holds either a quarter-precision or
7002 floating-point +0.0 constant. */
7003 static bool
7004 aarch64_valid_floating_const (machine_mode mode, rtx x)
7006 if (!CONST_DOUBLE_P (x))
7007 return false;
7009 /* TODO: We could handle moving 0.0 to a TFmode register,
7010 but first we would like to refactor the movtf_aarch64
7011 to be more amicable to split moves properly and
7012 correctly gate on TARGET_SIMD. For now - reject all
7013 constants which are not to SFmode or DFmode registers. */
7014 if (!(mode == SFmode || mode == DFmode))
7015 return false;
7017 if (aarch64_float_const_zero_rtx_p (x))
7018 return true;
7019 return aarch64_float_const_representable_p (x);
7022 static bool
7023 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
7025 /* Do not allow vector struct mode constants. We could support
7026 0 and -1 easily, but they need support in aarch64-simd.md. */
7027 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7028 return false;
7030 /* This could probably go away because
7031 we now decompose CONST_INTs according to expand_mov_immediate. */
7032 if ((GET_CODE (x) == CONST_VECTOR
7033 && aarch64_simd_valid_immediate (x, mode, false, NULL))
7034 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7035 return !targetm.cannot_force_const_mem (mode, x);
7037 if (GET_CODE (x) == HIGH
7038 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7039 return true;
7041 return aarch64_constant_address_p (x);
7045 aarch64_load_tp (rtx target)
7047 if (!target
7048 || GET_MODE (target) != Pmode
7049 || !register_operand (target, Pmode))
7050 target = gen_reg_rtx (Pmode);
7052 /* Can return in any reg. */
7053 emit_insn (gen_aarch64_load_tp_hard (target));
7054 return target;
7057 /* On AAPCS systems, this is the "struct __va_list". */
7058 static GTY(()) tree va_list_type;
7060 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7061 Return the type to use as __builtin_va_list.
7063 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7065 struct __va_list
7067 void *__stack;
7068 void *__gr_top;
7069 void *__vr_top;
7070 int __gr_offs;
7071 int __vr_offs;
7072 }; */
7074 static tree
7075 aarch64_build_builtin_va_list (void)
7077 tree va_list_name;
7078 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7080 /* Create the type. */
7081 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7082 /* Give it the required name. */
7083 va_list_name = build_decl (BUILTINS_LOCATION,
7084 TYPE_DECL,
7085 get_identifier ("__va_list"),
7086 va_list_type);
7087 DECL_ARTIFICIAL (va_list_name) = 1;
7088 TYPE_NAME (va_list_type) = va_list_name;
7089 TYPE_STUB_DECL (va_list_type) = va_list_name;
7091 /* Create the fields. */
7092 f_stack = build_decl (BUILTINS_LOCATION,
7093 FIELD_DECL, get_identifier ("__stack"),
7094 ptr_type_node);
7095 f_grtop = build_decl (BUILTINS_LOCATION,
7096 FIELD_DECL, get_identifier ("__gr_top"),
7097 ptr_type_node);
7098 f_vrtop = build_decl (BUILTINS_LOCATION,
7099 FIELD_DECL, get_identifier ("__vr_top"),
7100 ptr_type_node);
7101 f_groff = build_decl (BUILTINS_LOCATION,
7102 FIELD_DECL, get_identifier ("__gr_offs"),
7103 integer_type_node);
7104 f_vroff = build_decl (BUILTINS_LOCATION,
7105 FIELD_DECL, get_identifier ("__vr_offs"),
7106 integer_type_node);
7108 DECL_ARTIFICIAL (f_stack) = 1;
7109 DECL_ARTIFICIAL (f_grtop) = 1;
7110 DECL_ARTIFICIAL (f_vrtop) = 1;
7111 DECL_ARTIFICIAL (f_groff) = 1;
7112 DECL_ARTIFICIAL (f_vroff) = 1;
7114 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7115 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7116 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7117 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7118 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7120 TYPE_FIELDS (va_list_type) = f_stack;
7121 DECL_CHAIN (f_stack) = f_grtop;
7122 DECL_CHAIN (f_grtop) = f_vrtop;
7123 DECL_CHAIN (f_vrtop) = f_groff;
7124 DECL_CHAIN (f_groff) = f_vroff;
7126 /* Compute its layout. */
7127 layout_type (va_list_type);
7129 return va_list_type;
7132 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7133 static void
7134 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7136 const CUMULATIVE_ARGS *cum;
7137 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7138 tree stack, grtop, vrtop, groff, vroff;
7139 tree t;
7140 int gr_save_area_size;
7141 int vr_save_area_size;
7142 int vr_offset;
7144 cum = &crtl->args.info;
7145 gr_save_area_size
7146 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7147 vr_save_area_size
7148 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7150 if (TARGET_GENERAL_REGS_ONLY)
7152 if (cum->aapcs_nvrn > 0)
7153 sorry ("%qs and floating point or vector arguments",
7154 "-mgeneral-regs-only");
7155 vr_save_area_size = 0;
7158 f_stack = TYPE_FIELDS (va_list_type_node);
7159 f_grtop = DECL_CHAIN (f_stack);
7160 f_vrtop = DECL_CHAIN (f_grtop);
7161 f_groff = DECL_CHAIN (f_vrtop);
7162 f_vroff = DECL_CHAIN (f_groff);
7164 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7165 NULL_TREE);
7166 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7167 NULL_TREE);
7168 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7169 NULL_TREE);
7170 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7171 NULL_TREE);
7172 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7173 NULL_TREE);
7175 /* Emit code to initialize STACK, which points to the next varargs stack
7176 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7177 by named arguments. STACK is 8-byte aligned. */
7178 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7179 if (cum->aapcs_stack_size > 0)
7180 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7181 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7182 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7184 /* Emit code to initialize GRTOP, the top of the GR save area.
7185 virtual_incoming_args_rtx should have been 16 byte aligned. */
7186 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7187 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7188 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7190 /* Emit code to initialize VRTOP, the top of the VR save area.
7191 This address is gr_save_area_bytes below GRTOP, rounded
7192 down to the next 16-byte boundary. */
7193 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7194 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7195 STACK_BOUNDARY / BITS_PER_UNIT);
7197 if (vr_offset)
7198 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7199 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7200 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7202 /* Emit code to initialize GROFF, the offset from GRTOP of the
7203 next GPR argument. */
7204 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7205 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7206 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7208 /* Likewise emit code to initialize VROFF, the offset from FTOP
7209 of the next VR argument. */
7210 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7211 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7215 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7217 static tree
7218 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7219 gimple_seq *post_p ATTRIBUTE_UNUSED)
7221 tree addr;
7222 bool indirect_p;
7223 bool is_ha; /* is HFA or HVA. */
7224 bool dw_align; /* double-word align. */
7225 machine_mode ag_mode = VOIDmode;
7226 int nregs;
7227 machine_mode mode;
7229 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7230 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7231 HOST_WIDE_INT size, rsize, adjust, align;
7232 tree t, u, cond1, cond2;
7234 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7235 if (indirect_p)
7236 type = build_pointer_type (type);
7238 mode = TYPE_MODE (type);
7240 f_stack = TYPE_FIELDS (va_list_type_node);
7241 f_grtop = DECL_CHAIN (f_stack);
7242 f_vrtop = DECL_CHAIN (f_grtop);
7243 f_groff = DECL_CHAIN (f_vrtop);
7244 f_vroff = DECL_CHAIN (f_groff);
7246 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7247 f_stack, NULL_TREE);
7248 size = int_size_in_bytes (type);
7249 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7251 dw_align = false;
7252 adjust = 0;
7253 if (aarch64_vfp_is_call_or_return_candidate (mode,
7254 type,
7255 &ag_mode,
7256 &nregs,
7257 &is_ha))
7259 /* TYPE passed in fp/simd registers. */
7260 if (TARGET_GENERAL_REGS_ONLY)
7261 sorry ("%qs and floating point or vector arguments",
7262 "-mgeneral-regs-only");
7264 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7265 unshare_expr (valist), f_vrtop, NULL_TREE);
7266 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7267 unshare_expr (valist), f_vroff, NULL_TREE);
7269 rsize = nregs * UNITS_PER_VREG;
7271 if (is_ha)
7273 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7274 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7276 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7277 && size < UNITS_PER_VREG)
7279 adjust = UNITS_PER_VREG - size;
7282 else
7284 /* TYPE passed in general registers. */
7285 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7286 unshare_expr (valist), f_grtop, NULL_TREE);
7287 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7288 unshare_expr (valist), f_groff, NULL_TREE);
7289 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7290 nregs = rsize / UNITS_PER_WORD;
7292 if (align > 8)
7293 dw_align = true;
7295 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7296 && size < UNITS_PER_WORD)
7298 adjust = UNITS_PER_WORD - size;
7302 /* Get a local temporary for the field value. */
7303 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7305 /* Emit code to branch if off >= 0. */
7306 t = build2 (GE_EXPR, boolean_type_node, off,
7307 build_int_cst (TREE_TYPE (off), 0));
7308 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7310 if (dw_align)
7312 /* Emit: offs = (offs + 15) & -16. */
7313 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7314 build_int_cst (TREE_TYPE (off), 15));
7315 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7316 build_int_cst (TREE_TYPE (off), -16));
7317 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7319 else
7320 roundup = NULL;
7322 /* Update ap.__[g|v]r_offs */
7323 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7324 build_int_cst (TREE_TYPE (off), rsize));
7325 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7327 /* String up. */
7328 if (roundup)
7329 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7331 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7332 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7333 build_int_cst (TREE_TYPE (f_off), 0));
7334 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7336 /* String up: make sure the assignment happens before the use. */
7337 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7338 COND_EXPR_ELSE (cond1) = t;
7340 /* Prepare the trees handling the argument that is passed on the stack;
7341 the top level node will store in ON_STACK. */
7342 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7343 if (align > 8)
7345 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7346 t = fold_convert (intDI_type_node, arg);
7347 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7348 build_int_cst (TREE_TYPE (t), 15));
7349 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7350 build_int_cst (TREE_TYPE (t), -16));
7351 t = fold_convert (TREE_TYPE (arg), t);
7352 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7354 else
7355 roundup = NULL;
7356 /* Advance ap.__stack */
7357 t = fold_convert (intDI_type_node, arg);
7358 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7359 build_int_cst (TREE_TYPE (t), size + 7));
7360 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7361 build_int_cst (TREE_TYPE (t), -8));
7362 t = fold_convert (TREE_TYPE (arg), t);
7363 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7364 /* String up roundup and advance. */
7365 if (roundup)
7366 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7367 /* String up with arg */
7368 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7369 /* Big-endianness related address adjustment. */
7370 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7371 && size < UNITS_PER_WORD)
7373 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7374 size_int (UNITS_PER_WORD - size));
7375 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7378 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7379 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7381 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7382 t = off;
7383 if (adjust)
7384 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7385 build_int_cst (TREE_TYPE (off), adjust));
7387 t = fold_convert (sizetype, t);
7388 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7390 if (is_ha)
7392 /* type ha; // treat as "struct {ftype field[n];}"
7393 ... [computing offs]
7394 for (i = 0; i <nregs; ++i, offs += 16)
7395 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7396 return ha; */
7397 int i;
7398 tree tmp_ha, field_t, field_ptr_t;
7400 /* Declare a local variable. */
7401 tmp_ha = create_tmp_var_raw (type, "ha");
7402 gimple_add_tmp_var (tmp_ha);
7404 /* Establish the base type. */
7405 switch (ag_mode)
7407 case SFmode:
7408 field_t = float_type_node;
7409 field_ptr_t = float_ptr_type_node;
7410 break;
7411 case DFmode:
7412 field_t = double_type_node;
7413 field_ptr_t = double_ptr_type_node;
7414 break;
7415 case TFmode:
7416 field_t = long_double_type_node;
7417 field_ptr_t = long_double_ptr_type_node;
7418 break;
7419 /* The half precision and quad precision are not fully supported yet. Enable
7420 the following code after the support is complete. Need to find the correct
7421 type node for __fp16 *. */
7422 #if 0
7423 case HFmode:
7424 field_t = float_type_node;
7425 field_ptr_t = float_ptr_type_node;
7426 break;
7427 #endif
7428 case V2SImode:
7429 case V4SImode:
7431 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7432 field_t = build_vector_type_for_mode (innertype, ag_mode);
7433 field_ptr_t = build_pointer_type (field_t);
7435 break;
7436 default:
7437 gcc_assert (0);
7440 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7441 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7442 addr = t;
7443 t = fold_convert (field_ptr_t, addr);
7444 t = build2 (MODIFY_EXPR, field_t,
7445 build1 (INDIRECT_REF, field_t, tmp_ha),
7446 build1 (INDIRECT_REF, field_t, t));
7448 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7449 for (i = 1; i < nregs; ++i)
7451 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7452 u = fold_convert (field_ptr_t, addr);
7453 u = build2 (MODIFY_EXPR, field_t,
7454 build2 (MEM_REF, field_t, tmp_ha,
7455 build_int_cst (field_ptr_t,
7456 (i *
7457 int_size_in_bytes (field_t)))),
7458 build1 (INDIRECT_REF, field_t, u));
7459 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7462 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7463 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7466 COND_EXPR_ELSE (cond2) = t;
7467 addr = fold_convert (build_pointer_type (type), cond1);
7468 addr = build_va_arg_indirect_ref (addr);
7470 if (indirect_p)
7471 addr = build_va_arg_indirect_ref (addr);
7473 return addr;
7476 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7478 static void
7479 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7480 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7481 int no_rtl)
7483 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7484 CUMULATIVE_ARGS local_cum;
7485 int gr_saved, vr_saved;
7487 /* The caller has advanced CUM up to, but not beyond, the last named
7488 argument. Advance a local copy of CUM past the last "real" named
7489 argument, to find out how many registers are left over. */
7490 local_cum = *cum;
7491 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7493 /* Found out how many registers we need to save. */
7494 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7495 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7497 if (TARGET_GENERAL_REGS_ONLY)
7499 if (local_cum.aapcs_nvrn > 0)
7500 sorry ("%qs and floating point or vector arguments",
7501 "-mgeneral-regs-only");
7502 vr_saved = 0;
7505 if (!no_rtl)
7507 if (gr_saved > 0)
7509 rtx ptr, mem;
7511 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7512 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7513 - gr_saved * UNITS_PER_WORD);
7514 mem = gen_frame_mem (BLKmode, ptr);
7515 set_mem_alias_set (mem, get_varargs_alias_set ());
7517 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7518 mem, gr_saved);
7520 if (vr_saved > 0)
7522 /* We can't use move_block_from_reg, because it will use
7523 the wrong mode, storing D regs only. */
7524 machine_mode mode = TImode;
7525 int off, i;
7527 /* Set OFF to the offset from virtual_incoming_args_rtx of
7528 the first vector register. The VR save area lies below
7529 the GR one, and is aligned to 16 bytes. */
7530 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7531 STACK_BOUNDARY / BITS_PER_UNIT);
7532 off -= vr_saved * UNITS_PER_VREG;
7534 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7536 rtx ptr, mem;
7538 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7539 mem = gen_frame_mem (mode, ptr);
7540 set_mem_alias_set (mem, get_varargs_alias_set ());
7541 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7542 off += UNITS_PER_VREG;
7547 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7548 any complication of having crtl->args.pretend_args_size changed. */
7549 cfun->machine->frame.saved_varargs_size
7550 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7551 STACK_BOUNDARY / BITS_PER_UNIT)
7552 + vr_saved * UNITS_PER_VREG);
7555 static void
7556 aarch64_conditional_register_usage (void)
7558 int i;
7559 if (!TARGET_FLOAT)
7561 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7563 fixed_regs[i] = 1;
7564 call_used_regs[i] = 1;
7569 /* Walk down the type tree of TYPE counting consecutive base elements.
7570 If *MODEP is VOIDmode, then set it to the first valid floating point
7571 type. If a non-floating point type is found, or if a floating point
7572 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7573 otherwise return the count in the sub-tree. */
7574 static int
7575 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7577 machine_mode mode;
7578 HOST_WIDE_INT size;
7580 switch (TREE_CODE (type))
7582 case REAL_TYPE:
7583 mode = TYPE_MODE (type);
7584 if (mode != DFmode && mode != SFmode && mode != TFmode)
7585 return -1;
7587 if (*modep == VOIDmode)
7588 *modep = mode;
7590 if (*modep == mode)
7591 return 1;
7593 break;
7595 case COMPLEX_TYPE:
7596 mode = TYPE_MODE (TREE_TYPE (type));
7597 if (mode != DFmode && mode != SFmode && mode != TFmode)
7598 return -1;
7600 if (*modep == VOIDmode)
7601 *modep = mode;
7603 if (*modep == mode)
7604 return 2;
7606 break;
7608 case VECTOR_TYPE:
7609 /* Use V2SImode and V4SImode as representatives of all 64-bit
7610 and 128-bit vector types. */
7611 size = int_size_in_bytes (type);
7612 switch (size)
7614 case 8:
7615 mode = V2SImode;
7616 break;
7617 case 16:
7618 mode = V4SImode;
7619 break;
7620 default:
7621 return -1;
7624 if (*modep == VOIDmode)
7625 *modep = mode;
7627 /* Vector modes are considered to be opaque: two vectors are
7628 equivalent for the purposes of being homogeneous aggregates
7629 if they are the same size. */
7630 if (*modep == mode)
7631 return 1;
7633 break;
7635 case ARRAY_TYPE:
7637 int count;
7638 tree index = TYPE_DOMAIN (type);
7640 /* Can't handle incomplete types nor sizes that are not
7641 fixed. */
7642 if (!COMPLETE_TYPE_P (type)
7643 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7644 return -1;
7646 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7647 if (count == -1
7648 || !index
7649 || !TYPE_MAX_VALUE (index)
7650 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7651 || !TYPE_MIN_VALUE (index)
7652 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7653 || count < 0)
7654 return -1;
7656 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7657 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7659 /* There must be no padding. */
7660 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7661 return -1;
7663 return count;
7666 case RECORD_TYPE:
7668 int count = 0;
7669 int sub_count;
7670 tree field;
7672 /* Can't handle incomplete types nor sizes that are not
7673 fixed. */
7674 if (!COMPLETE_TYPE_P (type)
7675 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7676 return -1;
7678 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7680 if (TREE_CODE (field) != FIELD_DECL)
7681 continue;
7683 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7684 if (sub_count < 0)
7685 return -1;
7686 count += sub_count;
7689 /* There must be no padding. */
7690 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7691 return -1;
7693 return count;
7696 case UNION_TYPE:
7697 case QUAL_UNION_TYPE:
7699 /* These aren't very interesting except in a degenerate case. */
7700 int count = 0;
7701 int sub_count;
7702 tree field;
7704 /* Can't handle incomplete types nor sizes that are not
7705 fixed. */
7706 if (!COMPLETE_TYPE_P (type)
7707 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7708 return -1;
7710 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7712 if (TREE_CODE (field) != FIELD_DECL)
7713 continue;
7715 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7716 if (sub_count < 0)
7717 return -1;
7718 count = count > sub_count ? count : sub_count;
7721 /* There must be no padding. */
7722 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7723 return -1;
7725 return count;
7728 default:
7729 break;
7732 return -1;
7735 /* Return true if we use LRA instead of reload pass. */
7736 static bool
7737 aarch64_lra_p (void)
7739 return aarch64_lra_flag;
7742 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7743 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7744 array types. The C99 floating-point complex types are also considered
7745 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7746 types, which are GCC extensions and out of the scope of AAPCS64, are
7747 treated as composite types here as well.
7749 Note that MODE itself is not sufficient in determining whether a type
7750 is such a composite type or not. This is because
7751 stor-layout.c:compute_record_mode may have already changed the MODE
7752 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7753 structure with only one field may have its MODE set to the mode of the
7754 field. Also an integer mode whose size matches the size of the
7755 RECORD_TYPE type may be used to substitute the original mode
7756 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7757 solely relied on. */
7759 static bool
7760 aarch64_composite_type_p (const_tree type,
7761 machine_mode mode)
7763 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7764 return true;
7766 if (mode == BLKmode
7767 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7768 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7769 return true;
7771 return false;
7774 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7775 type as described in AAPCS64 \S 4.1.2.
7777 See the comment above aarch64_composite_type_p for the notes on MODE. */
7779 static bool
7780 aarch64_short_vector_p (const_tree type,
7781 machine_mode mode)
7783 HOST_WIDE_INT size = -1;
7785 if (type && TREE_CODE (type) == VECTOR_TYPE)
7786 size = int_size_in_bytes (type);
7787 else if (!aarch64_composite_type_p (type, mode)
7788 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7789 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7790 size = GET_MODE_SIZE (mode);
7792 return (size == 8 || size == 16) ? true : false;
7795 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7796 shall be passed or returned in simd/fp register(s) (providing these
7797 parameter passing registers are available).
7799 Upon successful return, *COUNT returns the number of needed registers,
7800 *BASE_MODE returns the mode of the individual register and when IS_HAF
7801 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7802 floating-point aggregate or a homogeneous short-vector aggregate. */
7804 static bool
7805 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7806 const_tree type,
7807 machine_mode *base_mode,
7808 int *count,
7809 bool *is_ha)
7811 machine_mode new_mode = VOIDmode;
7812 bool composite_p = aarch64_composite_type_p (type, mode);
7814 if (is_ha != NULL) *is_ha = false;
7816 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7817 || aarch64_short_vector_p (type, mode))
7819 *count = 1;
7820 new_mode = mode;
7822 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7824 if (is_ha != NULL) *is_ha = true;
7825 *count = 2;
7826 new_mode = GET_MODE_INNER (mode);
7828 else if (type && composite_p)
7830 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7832 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7834 if (is_ha != NULL) *is_ha = true;
7835 *count = ag_count;
7837 else
7838 return false;
7840 else
7841 return false;
7843 *base_mode = new_mode;
7844 return true;
7847 /* Implement TARGET_STRUCT_VALUE_RTX. */
7849 static rtx
7850 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7851 int incoming ATTRIBUTE_UNUSED)
7853 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7856 /* Implements target hook vector_mode_supported_p. */
7857 static bool
7858 aarch64_vector_mode_supported_p (machine_mode mode)
7860 if (TARGET_SIMD
7861 && (mode == V4SImode || mode == V8HImode
7862 || mode == V16QImode || mode == V2DImode
7863 || mode == V2SImode || mode == V4HImode
7864 || mode == V8QImode || mode == V2SFmode
7865 || mode == V4SFmode || mode == V2DFmode
7866 || mode == V1DFmode))
7867 return true;
7869 return false;
7872 /* Return appropriate SIMD container
7873 for MODE within a vector of WIDTH bits. */
7874 static machine_mode
7875 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7877 gcc_assert (width == 64 || width == 128);
7878 if (TARGET_SIMD)
7880 if (width == 128)
7881 switch (mode)
7883 case DFmode:
7884 return V2DFmode;
7885 case SFmode:
7886 return V4SFmode;
7887 case SImode:
7888 return V4SImode;
7889 case HImode:
7890 return V8HImode;
7891 case QImode:
7892 return V16QImode;
7893 case DImode:
7894 return V2DImode;
7895 default:
7896 break;
7898 else
7899 switch (mode)
7901 case SFmode:
7902 return V2SFmode;
7903 case SImode:
7904 return V2SImode;
7905 case HImode:
7906 return V4HImode;
7907 case QImode:
7908 return V8QImode;
7909 default:
7910 break;
7913 return word_mode;
7916 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7917 static machine_mode
7918 aarch64_preferred_simd_mode (machine_mode mode)
7920 return aarch64_simd_container_mode (mode, 128);
7923 /* Return the bitmask of possible vector sizes for the vectorizer
7924 to iterate over. */
7925 static unsigned int
7926 aarch64_autovectorize_vector_sizes (void)
7928 return (16 | 8);
7931 /* Implement TARGET_MANGLE_TYPE. */
7933 static const char *
7934 aarch64_mangle_type (const_tree type)
7936 /* The AArch64 ABI documents say that "__va_list" has to be
7937 managled as if it is in the "std" namespace. */
7938 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7939 return "St9__va_list";
7941 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
7942 builtin types. */
7943 if (TYPE_NAME (type) != NULL)
7944 return aarch64_mangle_builtin_type (type);
7946 /* Use the default mangling. */
7947 return NULL;
7951 /* Return true if the rtx_insn contains a MEM RTX somewhere
7952 in it. */
7954 static bool
7955 has_memory_op (rtx_insn *mem_insn)
7957 subrtx_iterator::array_type array;
7958 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7959 if (MEM_P (*iter))
7960 return true;
7962 return false;
7965 /* Find the first rtx_insn before insn that will generate an assembly
7966 instruction. */
7968 static rtx_insn *
7969 aarch64_prev_real_insn (rtx_insn *insn)
7971 if (!insn)
7972 return NULL;
7976 insn = prev_real_insn (insn);
7978 while (insn && recog_memoized (insn) < 0);
7980 return insn;
7983 static bool
7984 is_madd_op (enum attr_type t1)
7986 unsigned int i;
7987 /* A number of these may be AArch32 only. */
7988 enum attr_type mlatypes[] = {
7989 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7990 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7991 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7994 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7996 if (t1 == mlatypes[i])
7997 return true;
8000 return false;
8003 /* Check if there is a register dependency between a load and the insn
8004 for which we hold recog_data. */
8006 static bool
8007 dep_between_memop_and_curr (rtx memop)
8009 rtx load_reg;
8010 int opno;
8012 gcc_assert (GET_CODE (memop) == SET);
8014 if (!REG_P (SET_DEST (memop)))
8015 return false;
8017 load_reg = SET_DEST (memop);
8018 for (opno = 1; opno < recog_data.n_operands; opno++)
8020 rtx operand = recog_data.operand[opno];
8021 if (REG_P (operand)
8022 && reg_overlap_mentioned_p (load_reg, operand))
8023 return true;
8026 return false;
8030 /* When working around the Cortex-A53 erratum 835769,
8031 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8032 instruction and has a preceding memory instruction such that a NOP
8033 should be inserted between them. */
8035 bool
8036 aarch64_madd_needs_nop (rtx_insn* insn)
8038 enum attr_type attr_type;
8039 rtx_insn *prev;
8040 rtx body;
8042 if (!aarch64_fix_a53_err835769)
8043 return false;
8045 if (recog_memoized (insn) < 0)
8046 return false;
8048 attr_type = get_attr_type (insn);
8049 if (!is_madd_op (attr_type))
8050 return false;
8052 prev = aarch64_prev_real_insn (insn);
8053 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8054 Restore recog state to INSN to avoid state corruption. */
8055 extract_constrain_insn_cached (insn);
8057 if (!prev || !has_memory_op (prev))
8058 return false;
8060 body = single_set (prev);
8062 /* If the previous insn is a memory op and there is no dependency between
8063 it and the DImode madd, emit a NOP between them. If body is NULL then we
8064 have a complex memory operation, probably a load/store pair.
8065 Be conservative for now and emit a NOP. */
8066 if (GET_MODE (recog_data.operand[0]) == DImode
8067 && (!body || !dep_between_memop_and_curr (body)))
8068 return true;
8070 return false;
8075 /* Implement FINAL_PRESCAN_INSN. */
8077 void
8078 aarch64_final_prescan_insn (rtx_insn *insn)
8080 if (aarch64_madd_needs_nop (insn))
8081 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8085 /* Return the equivalent letter for size. */
8086 static char
8087 sizetochar (int size)
8089 switch (size)
8091 case 64: return 'd';
8092 case 32: return 's';
8093 case 16: return 'h';
8094 case 8 : return 'b';
8095 default: gcc_unreachable ();
8099 /* Return true iff x is a uniform vector of floating-point
8100 constants, and the constant can be represented in
8101 quarter-precision form. Note, as aarch64_float_const_representable
8102 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8103 static bool
8104 aarch64_vect_float_const_representable_p (rtx x)
8106 int i = 0;
8107 REAL_VALUE_TYPE r0, ri;
8108 rtx x0, xi;
8110 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8111 return false;
8113 x0 = CONST_VECTOR_ELT (x, 0);
8114 if (!CONST_DOUBLE_P (x0))
8115 return false;
8117 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8119 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8121 xi = CONST_VECTOR_ELT (x, i);
8122 if (!CONST_DOUBLE_P (xi))
8123 return false;
8125 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8126 if (!REAL_VALUES_EQUAL (r0, ri))
8127 return false;
8130 return aarch64_float_const_representable_p (x0);
8133 /* Return true for valid and false for invalid. */
8134 bool
8135 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
8136 struct simd_immediate_info *info)
8138 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8139 matches = 1; \
8140 for (i = 0; i < idx; i += (STRIDE)) \
8141 if (!(TEST)) \
8142 matches = 0; \
8143 if (matches) \
8145 immtype = (CLASS); \
8146 elsize = (ELSIZE); \
8147 eshift = (SHIFT); \
8148 emvn = (NEG); \
8149 break; \
8152 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8153 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8154 unsigned char bytes[16];
8155 int immtype = -1, matches;
8156 unsigned int invmask = inverse ? 0xff : 0;
8157 int eshift, emvn;
8159 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8161 if (! (aarch64_simd_imm_zero_p (op, mode)
8162 || aarch64_vect_float_const_representable_p (op)))
8163 return false;
8165 if (info)
8167 info->value = CONST_VECTOR_ELT (op, 0);
8168 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
8169 info->mvn = false;
8170 info->shift = 0;
8173 return true;
8176 /* Splat vector constant out into a byte vector. */
8177 for (i = 0; i < n_elts; i++)
8179 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8180 it must be laid out in the vector register in reverse order. */
8181 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
8182 unsigned HOST_WIDE_INT elpart;
8183 unsigned int part, parts;
8185 if (CONST_INT_P (el))
8187 elpart = INTVAL (el);
8188 parts = 1;
8190 else if (GET_CODE (el) == CONST_DOUBLE)
8192 elpart = CONST_DOUBLE_LOW (el);
8193 parts = 2;
8195 else
8196 gcc_unreachable ();
8198 for (part = 0; part < parts; part++)
8200 unsigned int byte;
8201 for (byte = 0; byte < innersize; byte++)
8203 bytes[idx++] = (elpart & 0xff) ^ invmask;
8204 elpart >>= BITS_PER_UNIT;
8206 if (GET_CODE (el) == CONST_DOUBLE)
8207 elpart = CONST_DOUBLE_HIGH (el);
8211 /* Sanity check. */
8212 gcc_assert (idx == GET_MODE_SIZE (mode));
8216 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8217 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8219 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8220 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8222 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8223 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8225 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8226 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8228 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8230 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8232 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8233 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8235 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8236 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8238 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8239 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8241 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8242 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8244 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8246 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8248 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8249 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8251 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8252 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8254 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8255 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8257 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8258 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8260 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8262 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8263 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8265 while (0);
8267 if (immtype == -1)
8268 return false;
8270 if (info)
8272 info->element_width = elsize;
8273 info->mvn = emvn != 0;
8274 info->shift = eshift;
8276 unsigned HOST_WIDE_INT imm = 0;
8278 if (immtype >= 12 && immtype <= 15)
8279 info->msl = true;
8281 /* Un-invert bytes of recognized vector, if necessary. */
8282 if (invmask != 0)
8283 for (i = 0; i < idx; i++)
8284 bytes[i] ^= invmask;
8286 if (immtype == 17)
8288 /* FIXME: Broken on 32-bit H_W_I hosts. */
8289 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8291 for (i = 0; i < 8; i++)
8292 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8293 << (i * BITS_PER_UNIT);
8296 info->value = GEN_INT (imm);
8298 else
8300 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8301 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8303 /* Construct 'abcdefgh' because the assembler cannot handle
8304 generic constants. */
8305 if (info->mvn)
8306 imm = ~imm;
8307 imm = (imm >> info->shift) & 0xff;
8308 info->value = GEN_INT (imm);
8312 return true;
8313 #undef CHECK
8316 /* Check of immediate shift constants are within range. */
8317 bool
8318 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8320 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8321 if (left)
8322 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8323 else
8324 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8327 /* Return true if X is a uniform vector where all elements
8328 are either the floating-point constant 0.0 or the
8329 integer constant 0. */
8330 bool
8331 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8333 return x == CONST0_RTX (mode);
8336 bool
8337 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8339 HOST_WIDE_INT imm = INTVAL (x);
8340 int i;
8342 for (i = 0; i < 8; i++)
8344 unsigned int byte = imm & 0xff;
8345 if (byte != 0xff && byte != 0)
8346 return false;
8347 imm >>= 8;
8350 return true;
8353 bool
8354 aarch64_mov_operand_p (rtx x,
8355 enum aarch64_symbol_context context,
8356 machine_mode mode)
8358 if (GET_CODE (x) == HIGH
8359 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8360 return true;
8362 if (CONST_INT_P (x))
8363 return true;
8365 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8366 return true;
8368 return aarch64_classify_symbolic_expression (x, context)
8369 == SYMBOL_TINY_ABSOLUTE;
8372 /* Return a const_int vector of VAL. */
8374 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8376 int nunits = GET_MODE_NUNITS (mode);
8377 rtvec v = rtvec_alloc (nunits);
8378 int i;
8380 for (i=0; i < nunits; i++)
8381 RTVEC_ELT (v, i) = GEN_INT (val);
8383 return gen_rtx_CONST_VECTOR (mode, v);
8386 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8388 bool
8389 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8391 machine_mode vmode;
8393 gcc_assert (!VECTOR_MODE_P (mode));
8394 vmode = aarch64_preferred_simd_mode (mode);
8395 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8396 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8399 /* Construct and return a PARALLEL RTX vector with elements numbering the
8400 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8401 the vector - from the perspective of the architecture. This does not
8402 line up with GCC's perspective on lane numbers, so we end up with
8403 different masks depending on our target endian-ness. The diagram
8404 below may help. We must draw the distinction when building masks
8405 which select one half of the vector. An instruction selecting
8406 architectural low-lanes for a big-endian target, must be described using
8407 a mask selecting GCC high-lanes.
8409 Big-Endian Little-Endian
8411 GCC 0 1 2 3 3 2 1 0
8412 | x | x | x | x | | x | x | x | x |
8413 Architecture 3 2 1 0 3 2 1 0
8415 Low Mask: { 2, 3 } { 0, 1 }
8416 High Mask: { 0, 1 } { 2, 3 }
8420 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8422 int nunits = GET_MODE_NUNITS (mode);
8423 rtvec v = rtvec_alloc (nunits / 2);
8424 int high_base = nunits / 2;
8425 int low_base = 0;
8426 int base;
8427 rtx t1;
8428 int i;
8430 if (BYTES_BIG_ENDIAN)
8431 base = high ? low_base : high_base;
8432 else
8433 base = high ? high_base : low_base;
8435 for (i = 0; i < nunits / 2; i++)
8436 RTVEC_ELT (v, i) = GEN_INT (base + i);
8438 t1 = gen_rtx_PARALLEL (mode, v);
8439 return t1;
8442 /* Check OP for validity as a PARALLEL RTX vector with elements
8443 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8444 from the perspective of the architecture. See the diagram above
8445 aarch64_simd_vect_par_cnst_half for more details. */
8447 bool
8448 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8449 bool high)
8451 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8452 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8453 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8454 int i = 0;
8456 if (!VECTOR_MODE_P (mode))
8457 return false;
8459 if (count_op != count_ideal)
8460 return false;
8462 for (i = 0; i < count_ideal; i++)
8464 rtx elt_op = XVECEXP (op, 0, i);
8465 rtx elt_ideal = XVECEXP (ideal, 0, i);
8467 if (!CONST_INT_P (elt_op)
8468 || INTVAL (elt_ideal) != INTVAL (elt_op))
8469 return false;
8471 return true;
8474 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8475 HIGH (exclusive). */
8476 void
8477 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8478 const_tree exp)
8480 HOST_WIDE_INT lane;
8481 gcc_assert (CONST_INT_P (operand));
8482 lane = INTVAL (operand);
8484 if (lane < low || lane >= high)
8486 if (exp)
8487 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8488 else
8489 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8493 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8494 registers). */
8495 void
8496 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8497 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8498 rtx op1)
8500 rtx mem = gen_rtx_MEM (mode, destaddr);
8501 rtx tmp1 = gen_reg_rtx (mode);
8502 rtx tmp2 = gen_reg_rtx (mode);
8504 emit_insn (intfn (tmp1, op1, tmp2));
8506 emit_move_insn (mem, tmp1);
8507 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8508 emit_move_insn (mem, tmp2);
8511 /* Return TRUE if OP is a valid vector addressing mode. */
8512 bool
8513 aarch64_simd_mem_operand_p (rtx op)
8515 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8516 || REG_P (XEXP (op, 0)));
8519 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8520 not to early-clobber SRC registers in the process.
8522 We assume that the operands described by SRC and DEST represent a
8523 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8524 number of components into which the copy has been decomposed. */
8525 void
8526 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8527 rtx *src, unsigned int count)
8529 unsigned int i;
8531 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8532 || REGNO (operands[0]) < REGNO (operands[1]))
8534 for (i = 0; i < count; i++)
8536 operands[2 * i] = dest[i];
8537 operands[2 * i + 1] = src[i];
8540 else
8542 for (i = 0; i < count; i++)
8544 operands[2 * i] = dest[count - i - 1];
8545 operands[2 * i + 1] = src[count - i - 1];
8550 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8551 one of VSTRUCT modes: OI, CI or XI. */
8553 aarch64_simd_attr_length_move (rtx_insn *insn)
8555 machine_mode mode;
8557 extract_insn_cached (insn);
8559 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8561 mode = GET_MODE (recog_data.operand[0]);
8562 switch (mode)
8564 case OImode:
8565 return 8;
8566 case CImode:
8567 return 12;
8568 case XImode:
8569 return 16;
8570 default:
8571 gcc_unreachable ();
8574 return 4;
8577 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8578 alignment of a vector to 128 bits. */
8579 static HOST_WIDE_INT
8580 aarch64_simd_vector_alignment (const_tree type)
8582 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8583 return MIN (align, 128);
8586 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8587 static bool
8588 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8590 if (is_packed)
8591 return false;
8593 /* We guarantee alignment for vectors up to 128-bits. */
8594 if (tree_int_cst_compare (TYPE_SIZE (type),
8595 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8596 return false;
8598 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8599 return true;
8602 /* If VALS is a vector constant that can be loaded into a register
8603 using DUP, generate instructions to do so and return an RTX to
8604 assign to the register. Otherwise return NULL_RTX. */
8605 static rtx
8606 aarch64_simd_dup_constant (rtx vals)
8608 machine_mode mode = GET_MODE (vals);
8609 machine_mode inner_mode = GET_MODE_INNER (mode);
8610 int n_elts = GET_MODE_NUNITS (mode);
8611 bool all_same = true;
8612 rtx x;
8613 int i;
8615 if (GET_CODE (vals) != CONST_VECTOR)
8616 return NULL_RTX;
8618 for (i = 1; i < n_elts; ++i)
8620 x = CONST_VECTOR_ELT (vals, i);
8621 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8622 all_same = false;
8625 if (!all_same)
8626 return NULL_RTX;
8628 /* We can load this constant by using DUP and a constant in a
8629 single ARM register. This will be cheaper than a vector
8630 load. */
8631 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8632 return gen_rtx_VEC_DUPLICATE (mode, x);
8636 /* Generate code to load VALS, which is a PARALLEL containing only
8637 constants (for vec_init) or CONST_VECTOR, efficiently into a
8638 register. Returns an RTX to copy into the register, or NULL_RTX
8639 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8640 static rtx
8641 aarch64_simd_make_constant (rtx vals)
8643 machine_mode mode = GET_MODE (vals);
8644 rtx const_dup;
8645 rtx const_vec = NULL_RTX;
8646 int n_elts = GET_MODE_NUNITS (mode);
8647 int n_const = 0;
8648 int i;
8650 if (GET_CODE (vals) == CONST_VECTOR)
8651 const_vec = vals;
8652 else if (GET_CODE (vals) == PARALLEL)
8654 /* A CONST_VECTOR must contain only CONST_INTs and
8655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8656 Only store valid constants in a CONST_VECTOR. */
8657 for (i = 0; i < n_elts; ++i)
8659 rtx x = XVECEXP (vals, 0, i);
8660 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8661 n_const++;
8663 if (n_const == n_elts)
8664 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8666 else
8667 gcc_unreachable ();
8669 if (const_vec != NULL_RTX
8670 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8671 /* Load using MOVI/MVNI. */
8672 return const_vec;
8673 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8674 /* Loaded using DUP. */
8675 return const_dup;
8676 else if (const_vec != NULL_RTX)
8677 /* Load from constant pool. We can not take advantage of single-cycle
8678 LD1 because we need a PC-relative addressing mode. */
8679 return const_vec;
8680 else
8681 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8682 We can not construct an initializer. */
8683 return NULL_RTX;
8686 void
8687 aarch64_expand_vector_init (rtx target, rtx vals)
8689 machine_mode mode = GET_MODE (target);
8690 machine_mode inner_mode = GET_MODE_INNER (mode);
8691 int n_elts = GET_MODE_NUNITS (mode);
8692 int n_var = 0, one_var = -1;
8693 bool all_same = true;
8694 rtx x, mem;
8695 int i;
8697 x = XVECEXP (vals, 0, 0);
8698 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8699 n_var = 1, one_var = 0;
8701 for (i = 1; i < n_elts; ++i)
8703 x = XVECEXP (vals, 0, i);
8704 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8705 ++n_var, one_var = i;
8707 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8708 all_same = false;
8711 if (n_var == 0)
8713 rtx constant = aarch64_simd_make_constant (vals);
8714 if (constant != NULL_RTX)
8716 emit_move_insn (target, constant);
8717 return;
8721 /* Splat a single non-constant element if we can. */
8722 if (all_same)
8724 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8725 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8726 return;
8729 /* One field is non-constant. Load constant then overwrite varying
8730 field. This is more efficient than using the stack. */
8731 if (n_var == 1)
8733 rtx copy = copy_rtx (vals);
8734 rtx index = GEN_INT (one_var);
8735 enum insn_code icode;
8737 /* Load constant part of vector, substitute neighboring value for
8738 varying element. */
8739 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8740 aarch64_expand_vector_init (target, copy);
8742 /* Insert variable. */
8743 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8744 icode = optab_handler (vec_set_optab, mode);
8745 gcc_assert (icode != CODE_FOR_nothing);
8746 emit_insn (GEN_FCN (icode) (target, x, index));
8747 return;
8750 /* Construct the vector in memory one field at a time
8751 and load the whole vector. */
8752 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8753 for (i = 0; i < n_elts; i++)
8754 emit_move_insn (adjust_address_nv (mem, inner_mode,
8755 i * GET_MODE_SIZE (inner_mode)),
8756 XVECEXP (vals, 0, i));
8757 emit_move_insn (target, mem);
8761 static unsigned HOST_WIDE_INT
8762 aarch64_shift_truncation_mask (machine_mode mode)
8764 return
8765 (aarch64_vector_mode_supported_p (mode)
8766 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8769 #ifndef TLS_SECTION_ASM_FLAG
8770 #define TLS_SECTION_ASM_FLAG 'T'
8771 #endif
8773 void
8774 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8775 tree decl ATTRIBUTE_UNUSED)
8777 char flagchars[10], *f = flagchars;
8779 /* If we have already declared this section, we can use an
8780 abbreviated form to switch back to it -- unless this section is
8781 part of a COMDAT groups, in which case GAS requires the full
8782 declaration every time. */
8783 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8784 && (flags & SECTION_DECLARED))
8786 fprintf (asm_out_file, "\t.section\t%s\n", name);
8787 return;
8790 if (!(flags & SECTION_DEBUG))
8791 *f++ = 'a';
8792 if (flags & SECTION_WRITE)
8793 *f++ = 'w';
8794 if (flags & SECTION_CODE)
8795 *f++ = 'x';
8796 if (flags & SECTION_SMALL)
8797 *f++ = 's';
8798 if (flags & SECTION_MERGE)
8799 *f++ = 'M';
8800 if (flags & SECTION_STRINGS)
8801 *f++ = 'S';
8802 if (flags & SECTION_TLS)
8803 *f++ = TLS_SECTION_ASM_FLAG;
8804 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8805 *f++ = 'G';
8806 *f = '\0';
8808 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8810 if (!(flags & SECTION_NOTYPE))
8812 const char *type;
8813 const char *format;
8815 if (flags & SECTION_BSS)
8816 type = "nobits";
8817 else
8818 type = "progbits";
8820 #ifdef TYPE_OPERAND_FMT
8821 format = "," TYPE_OPERAND_FMT;
8822 #else
8823 format = ",@%s";
8824 #endif
8826 fprintf (asm_out_file, format, type);
8828 if (flags & SECTION_ENTSIZE)
8829 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8830 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8832 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8833 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8834 else
8835 fprintf (asm_out_file, ",%s,comdat",
8836 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8840 putc ('\n', asm_out_file);
8843 /* Select a format to encode pointers in exception handling data. */
8845 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8847 int type;
8848 switch (aarch64_cmodel)
8850 case AARCH64_CMODEL_TINY:
8851 case AARCH64_CMODEL_TINY_PIC:
8852 case AARCH64_CMODEL_SMALL:
8853 case AARCH64_CMODEL_SMALL_PIC:
8854 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8855 for everything. */
8856 type = DW_EH_PE_sdata4;
8857 break;
8858 default:
8859 /* No assumptions here. 8-byte relocs required. */
8860 type = DW_EH_PE_sdata8;
8861 break;
8863 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8866 /* Emit load exclusive. */
8868 static void
8869 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8870 rtx mem, rtx model_rtx)
8872 rtx (*gen) (rtx, rtx, rtx);
8874 switch (mode)
8876 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8877 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8878 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8879 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8880 default:
8881 gcc_unreachable ();
8884 emit_insn (gen (rval, mem, model_rtx));
8887 /* Emit store exclusive. */
8889 static void
8890 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
8891 rtx rval, rtx mem, rtx model_rtx)
8893 rtx (*gen) (rtx, rtx, rtx, rtx);
8895 switch (mode)
8897 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8898 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8899 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8900 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8901 default:
8902 gcc_unreachable ();
8905 emit_insn (gen (bval, rval, mem, model_rtx));
8908 /* Mark the previous jump instruction as unlikely. */
8910 static void
8911 aarch64_emit_unlikely_jump (rtx insn)
8913 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8915 insn = emit_jump_insn (insn);
8916 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8919 /* Expand a compare and swap pattern. */
8921 void
8922 aarch64_expand_compare_and_swap (rtx operands[])
8924 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8925 machine_mode mode, cmp_mode;
8926 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8928 bval = operands[0];
8929 rval = operands[1];
8930 mem = operands[2];
8931 oldval = operands[3];
8932 newval = operands[4];
8933 is_weak = operands[5];
8934 mod_s = operands[6];
8935 mod_f = operands[7];
8936 mode = GET_MODE (mem);
8937 cmp_mode = mode;
8939 /* Normally the succ memory model must be stronger than fail, but in the
8940 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8941 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8943 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8944 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8945 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8947 switch (mode)
8949 case QImode:
8950 case HImode:
8951 /* For short modes, we're going to perform the comparison in SImode,
8952 so do the zero-extension now. */
8953 cmp_mode = SImode;
8954 rval = gen_reg_rtx (SImode);
8955 oldval = convert_modes (SImode, mode, oldval, true);
8956 /* Fall through. */
8958 case SImode:
8959 case DImode:
8960 /* Force the value into a register if needed. */
8961 if (!aarch64_plus_operand (oldval, mode))
8962 oldval = force_reg (cmp_mode, oldval);
8963 break;
8965 default:
8966 gcc_unreachable ();
8969 switch (mode)
8971 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8972 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8973 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8974 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8975 default:
8976 gcc_unreachable ();
8979 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8981 if (mode == QImode || mode == HImode)
8982 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8984 x = gen_rtx_REG (CCmode, CC_REGNUM);
8985 x = gen_rtx_EQ (SImode, x, const0_rtx);
8986 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8989 /* Split a compare and swap pattern. */
8991 void
8992 aarch64_split_compare_and_swap (rtx operands[])
8994 rtx rval, mem, oldval, newval, scratch;
8995 machine_mode mode;
8996 bool is_weak;
8997 rtx_code_label *label1, *label2;
8998 rtx x, cond;
9000 rval = operands[0];
9001 mem = operands[1];
9002 oldval = operands[2];
9003 newval = operands[3];
9004 is_weak = (operands[4] != const0_rtx);
9005 scratch = operands[7];
9006 mode = GET_MODE (mem);
9008 label1 = NULL;
9009 if (!is_weak)
9011 label1 = gen_label_rtx ();
9012 emit_label (label1);
9014 label2 = gen_label_rtx ();
9016 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9018 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9019 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9020 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9021 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9022 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9024 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9026 if (!is_weak)
9028 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9029 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9030 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9031 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9033 else
9035 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9036 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9037 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9040 emit_label (label2);
9043 /* Split an atomic operation. */
9045 void
9046 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9047 rtx value, rtx model_rtx, rtx cond)
9049 machine_mode mode = GET_MODE (mem);
9050 machine_mode wmode = (mode == DImode ? DImode : SImode);
9051 rtx_code_label *label;
9052 rtx x;
9054 label = gen_label_rtx ();
9055 emit_label (label);
9057 if (new_out)
9058 new_out = gen_lowpart (wmode, new_out);
9059 if (old_out)
9060 old_out = gen_lowpart (wmode, old_out);
9061 else
9062 old_out = new_out;
9063 value = simplify_gen_subreg (wmode, value, mode, 0);
9065 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9067 switch (code)
9069 case SET:
9070 new_out = value;
9071 break;
9073 case NOT:
9074 x = gen_rtx_AND (wmode, old_out, value);
9075 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9076 x = gen_rtx_NOT (wmode, new_out);
9077 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9078 break;
9080 case MINUS:
9081 if (CONST_INT_P (value))
9083 value = GEN_INT (-INTVAL (value));
9084 code = PLUS;
9086 /* Fall through. */
9088 default:
9089 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9090 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9091 break;
9094 aarch64_emit_store_exclusive (mode, cond, mem,
9095 gen_lowpart (mode, new_out), model_rtx);
9097 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9098 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9099 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9100 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9103 static void
9104 aarch64_print_extension (void)
9106 const struct aarch64_option_extension *opt = NULL;
9108 for (opt = all_extensions; opt->name != NULL; opt++)
9109 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9110 asm_fprintf (asm_out_file, "+%s", opt->name);
9112 asm_fprintf (asm_out_file, "\n");
9115 static void
9116 aarch64_start_file (void)
9118 if (selected_arch)
9120 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9121 aarch64_print_extension ();
9123 else if (selected_cpu)
9125 const char *truncated_name
9126 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9127 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
9128 aarch64_print_extension ();
9130 default_file_start();
9133 /* Target hook for c_mode_for_suffix. */
9134 static machine_mode
9135 aarch64_c_mode_for_suffix (char suffix)
9137 if (suffix == 'q')
9138 return TFmode;
9140 return VOIDmode;
9143 /* We can only represent floating point constants which will fit in
9144 "quarter-precision" values. These values are characterised by
9145 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9148 (-1)^s * (n/16) * 2^r
9150 Where:
9151 's' is the sign bit.
9152 'n' is an integer in the range 16 <= n <= 31.
9153 'r' is an integer in the range -3 <= r <= 4. */
9155 /* Return true iff X can be represented by a quarter-precision
9156 floating point immediate operand X. Note, we cannot represent 0.0. */
9157 bool
9158 aarch64_float_const_representable_p (rtx x)
9160 /* This represents our current view of how many bits
9161 make up the mantissa. */
9162 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9163 int exponent;
9164 unsigned HOST_WIDE_INT mantissa, mask;
9165 REAL_VALUE_TYPE r, m;
9166 bool fail;
9168 if (!CONST_DOUBLE_P (x))
9169 return false;
9171 if (GET_MODE (x) == VOIDmode)
9172 return false;
9174 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9176 /* We cannot represent infinities, NaNs or +/-zero. We won't
9177 know if we have +zero until we analyse the mantissa, but we
9178 can reject the other invalid values. */
9179 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9180 || REAL_VALUE_MINUS_ZERO (r))
9181 return false;
9183 /* Extract exponent. */
9184 r = real_value_abs (&r);
9185 exponent = REAL_EXP (&r);
9187 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9188 highest (sign) bit, with a fixed binary point at bit point_pos.
9189 m1 holds the low part of the mantissa, m2 the high part.
9190 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9191 bits for the mantissa, this can fail (low bits will be lost). */
9192 real_ldexp (&m, &r, point_pos - exponent);
9193 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
9195 /* If the low part of the mantissa has bits set we cannot represent
9196 the value. */
9197 if (w.elt (0) != 0)
9198 return false;
9199 /* We have rejected the lower HOST_WIDE_INT, so update our
9200 understanding of how many bits lie in the mantissa and
9201 look only at the high HOST_WIDE_INT. */
9202 mantissa = w.elt (1);
9203 point_pos -= HOST_BITS_PER_WIDE_INT;
9205 /* We can only represent values with a mantissa of the form 1.xxxx. */
9206 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9207 if ((mantissa & mask) != 0)
9208 return false;
9210 /* Having filtered unrepresentable values, we may now remove all
9211 but the highest 5 bits. */
9212 mantissa >>= point_pos - 5;
9214 /* We cannot represent the value 0.0, so reject it. This is handled
9215 elsewhere. */
9216 if (mantissa == 0)
9217 return false;
9219 /* Then, as bit 4 is always set, we can mask it off, leaving
9220 the mantissa in the range [0, 15]. */
9221 mantissa &= ~(1 << 4);
9222 gcc_assert (mantissa <= 15);
9224 /* GCC internally does not use IEEE754-like encoding (where normalized
9225 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9226 Our mantissa values are shifted 4 places to the left relative to
9227 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9228 by 5 places to correct for GCC's representation. */
9229 exponent = 5 - exponent;
9231 return (exponent >= 0 && exponent <= 7);
9234 char*
9235 aarch64_output_simd_mov_immediate (rtx const_vector,
9236 machine_mode mode,
9237 unsigned width)
9239 bool is_valid;
9240 static char templ[40];
9241 const char *mnemonic;
9242 const char *shift_op;
9243 unsigned int lane_count = 0;
9244 char element_char;
9246 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
9248 /* This will return true to show const_vector is legal for use as either
9249 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9250 also update INFO to show how the immediate should be generated. */
9251 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
9252 gcc_assert (is_valid);
9254 element_char = sizetochar (info.element_width);
9255 lane_count = width / info.element_width;
9257 mode = GET_MODE_INNER (mode);
9258 if (mode == SFmode || mode == DFmode)
9260 gcc_assert (info.shift == 0 && ! info.mvn);
9261 if (aarch64_float_const_zero_rtx_p (info.value))
9262 info.value = GEN_INT (0);
9263 else
9265 #define buf_size 20
9266 REAL_VALUE_TYPE r;
9267 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9268 char float_buf[buf_size] = {'\0'};
9269 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9270 #undef buf_size
9272 if (lane_count == 1)
9273 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9274 else
9275 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
9276 lane_count, element_char, float_buf);
9277 return templ;
9281 mnemonic = info.mvn ? "mvni" : "movi";
9282 shift_op = info.msl ? "msl" : "lsl";
9284 if (lane_count == 1)
9285 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9286 mnemonic, UINTVAL (info.value));
9287 else if (info.shift)
9288 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9289 ", %s %d", mnemonic, lane_count, element_char,
9290 UINTVAL (info.value), shift_op, info.shift);
9291 else
9292 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9293 mnemonic, lane_count, element_char, UINTVAL (info.value));
9294 return templ;
9297 char*
9298 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9299 machine_mode mode)
9301 machine_mode vmode;
9303 gcc_assert (!VECTOR_MODE_P (mode));
9304 vmode = aarch64_simd_container_mode (mode, 64);
9305 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9306 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9309 /* Split operands into moves from op[1] + op[2] into op[0]. */
9311 void
9312 aarch64_split_combinev16qi (rtx operands[3])
9314 unsigned int dest = REGNO (operands[0]);
9315 unsigned int src1 = REGNO (operands[1]);
9316 unsigned int src2 = REGNO (operands[2]);
9317 machine_mode halfmode = GET_MODE (operands[1]);
9318 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9319 rtx destlo, desthi;
9321 gcc_assert (halfmode == V16QImode);
9323 if (src1 == dest && src2 == dest + halfregs)
9325 /* No-op move. Can't split to nothing; emit something. */
9326 emit_note (NOTE_INSN_DELETED);
9327 return;
9330 /* Preserve register attributes for variable tracking. */
9331 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9332 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9333 GET_MODE_SIZE (halfmode));
9335 /* Special case of reversed high/low parts. */
9336 if (reg_overlap_mentioned_p (operands[2], destlo)
9337 && reg_overlap_mentioned_p (operands[1], desthi))
9339 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9340 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9341 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9343 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9345 /* Try to avoid unnecessary moves if part of the result
9346 is in the right place already. */
9347 if (src1 != dest)
9348 emit_move_insn (destlo, operands[1]);
9349 if (src2 != dest + halfregs)
9350 emit_move_insn (desthi, operands[2]);
9352 else
9354 if (src2 != dest + halfregs)
9355 emit_move_insn (desthi, operands[2]);
9356 if (src1 != dest)
9357 emit_move_insn (destlo, operands[1]);
9361 /* vec_perm support. */
9363 #define MAX_VECT_LEN 16
9365 struct expand_vec_perm_d
9367 rtx target, op0, op1;
9368 unsigned char perm[MAX_VECT_LEN];
9369 machine_mode vmode;
9370 unsigned char nelt;
9371 bool one_vector_p;
9372 bool testing_p;
9375 /* Generate a variable permutation. */
9377 static void
9378 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9380 machine_mode vmode = GET_MODE (target);
9381 bool one_vector_p = rtx_equal_p (op0, op1);
9383 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9384 gcc_checking_assert (GET_MODE (op0) == vmode);
9385 gcc_checking_assert (GET_MODE (op1) == vmode);
9386 gcc_checking_assert (GET_MODE (sel) == vmode);
9387 gcc_checking_assert (TARGET_SIMD);
9389 if (one_vector_p)
9391 if (vmode == V8QImode)
9393 /* Expand the argument to a V16QI mode by duplicating it. */
9394 rtx pair = gen_reg_rtx (V16QImode);
9395 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9396 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9398 else
9400 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9403 else
9405 rtx pair;
9407 if (vmode == V8QImode)
9409 pair = gen_reg_rtx (V16QImode);
9410 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9411 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9413 else
9415 pair = gen_reg_rtx (OImode);
9416 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9417 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9422 void
9423 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9425 machine_mode vmode = GET_MODE (target);
9426 unsigned int nelt = GET_MODE_NUNITS (vmode);
9427 bool one_vector_p = rtx_equal_p (op0, op1);
9428 rtx mask;
9430 /* The TBL instruction does not use a modulo index, so we must take care
9431 of that ourselves. */
9432 mask = aarch64_simd_gen_const_vector_dup (vmode,
9433 one_vector_p ? nelt - 1 : 2 * nelt - 1);
9434 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9436 /* For big-endian, we also need to reverse the index within the vector
9437 (but not which vector). */
9438 if (BYTES_BIG_ENDIAN)
9440 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9441 if (!one_vector_p)
9442 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9443 sel = expand_simple_binop (vmode, XOR, sel, mask,
9444 NULL, 0, OPTAB_LIB_WIDEN);
9446 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9449 /* Recognize patterns suitable for the TRN instructions. */
9450 static bool
9451 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9453 unsigned int i, odd, mask, nelt = d->nelt;
9454 rtx out, in0, in1, x;
9455 rtx (*gen) (rtx, rtx, rtx);
9456 machine_mode vmode = d->vmode;
9458 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9459 return false;
9461 /* Note that these are little-endian tests.
9462 We correct for big-endian later. */
9463 if (d->perm[0] == 0)
9464 odd = 0;
9465 else if (d->perm[0] == 1)
9466 odd = 1;
9467 else
9468 return false;
9469 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9471 for (i = 0; i < nelt; i += 2)
9473 if (d->perm[i] != i + odd)
9474 return false;
9475 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9476 return false;
9479 /* Success! */
9480 if (d->testing_p)
9481 return true;
9483 in0 = d->op0;
9484 in1 = d->op1;
9485 if (BYTES_BIG_ENDIAN)
9487 x = in0, in0 = in1, in1 = x;
9488 odd = !odd;
9490 out = d->target;
9492 if (odd)
9494 switch (vmode)
9496 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9497 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9498 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9499 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9500 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9501 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9502 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9503 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9504 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9505 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9506 default:
9507 return false;
9510 else
9512 switch (vmode)
9514 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9515 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9516 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9517 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9518 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9519 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9520 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9521 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9522 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9523 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9524 default:
9525 return false;
9529 emit_insn (gen (out, in0, in1));
9530 return true;
9533 /* Recognize patterns suitable for the UZP instructions. */
9534 static bool
9535 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9537 unsigned int i, odd, mask, nelt = d->nelt;
9538 rtx out, in0, in1, x;
9539 rtx (*gen) (rtx, rtx, rtx);
9540 machine_mode vmode = d->vmode;
9542 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9543 return false;
9545 /* Note that these are little-endian tests.
9546 We correct for big-endian later. */
9547 if (d->perm[0] == 0)
9548 odd = 0;
9549 else if (d->perm[0] == 1)
9550 odd = 1;
9551 else
9552 return false;
9553 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9555 for (i = 0; i < nelt; i++)
9557 unsigned elt = (i * 2 + odd) & mask;
9558 if (d->perm[i] != elt)
9559 return false;
9562 /* Success! */
9563 if (d->testing_p)
9564 return true;
9566 in0 = d->op0;
9567 in1 = d->op1;
9568 if (BYTES_BIG_ENDIAN)
9570 x = in0, in0 = in1, in1 = x;
9571 odd = !odd;
9573 out = d->target;
9575 if (odd)
9577 switch (vmode)
9579 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9580 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9581 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9582 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9583 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9584 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9585 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9586 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9587 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9588 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9589 default:
9590 return false;
9593 else
9595 switch (vmode)
9597 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9598 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9599 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9600 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9601 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9602 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9603 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9604 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9605 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9606 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9607 default:
9608 return false;
9612 emit_insn (gen (out, in0, in1));
9613 return true;
9616 /* Recognize patterns suitable for the ZIP instructions. */
9617 static bool
9618 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9620 unsigned int i, high, mask, nelt = d->nelt;
9621 rtx out, in0, in1, x;
9622 rtx (*gen) (rtx, rtx, rtx);
9623 machine_mode vmode = d->vmode;
9625 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9626 return false;
9628 /* Note that these are little-endian tests.
9629 We correct for big-endian later. */
9630 high = nelt / 2;
9631 if (d->perm[0] == high)
9632 /* Do Nothing. */
9634 else if (d->perm[0] == 0)
9635 high = 0;
9636 else
9637 return false;
9638 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9640 for (i = 0; i < nelt / 2; i++)
9642 unsigned elt = (i + high) & mask;
9643 if (d->perm[i * 2] != elt)
9644 return false;
9645 elt = (elt + nelt) & mask;
9646 if (d->perm[i * 2 + 1] != elt)
9647 return false;
9650 /* Success! */
9651 if (d->testing_p)
9652 return true;
9654 in0 = d->op0;
9655 in1 = d->op1;
9656 if (BYTES_BIG_ENDIAN)
9658 x = in0, in0 = in1, in1 = x;
9659 high = !high;
9661 out = d->target;
9663 if (high)
9665 switch (vmode)
9667 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9668 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9669 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9670 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9671 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9672 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9673 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9674 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9675 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9676 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9677 default:
9678 return false;
9681 else
9683 switch (vmode)
9685 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9686 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9687 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9688 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9689 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9690 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9691 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9692 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9693 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9694 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9695 default:
9696 return false;
9700 emit_insn (gen (out, in0, in1));
9701 return true;
9704 /* Recognize patterns for the EXT insn. */
9706 static bool
9707 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9709 unsigned int i, nelt = d->nelt;
9710 rtx (*gen) (rtx, rtx, rtx, rtx);
9711 rtx offset;
9713 unsigned int location = d->perm[0]; /* Always < nelt. */
9715 /* Check if the extracted indices are increasing by one. */
9716 for (i = 1; i < nelt; i++)
9718 unsigned int required = location + i;
9719 if (d->one_vector_p)
9721 /* We'll pass the same vector in twice, so allow indices to wrap. */
9722 required &= (nelt - 1);
9724 if (d->perm[i] != required)
9725 return false;
9728 switch (d->vmode)
9730 case V16QImode: gen = gen_aarch64_extv16qi; break;
9731 case V8QImode: gen = gen_aarch64_extv8qi; break;
9732 case V4HImode: gen = gen_aarch64_extv4hi; break;
9733 case V8HImode: gen = gen_aarch64_extv8hi; break;
9734 case V2SImode: gen = gen_aarch64_extv2si; break;
9735 case V4SImode: gen = gen_aarch64_extv4si; break;
9736 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9737 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9738 case V2DImode: gen = gen_aarch64_extv2di; break;
9739 case V2DFmode: gen = gen_aarch64_extv2df; break;
9740 default:
9741 return false;
9744 /* Success! */
9745 if (d->testing_p)
9746 return true;
9748 /* The case where (location == 0) is a no-op for both big- and little-endian,
9749 and is removed by the mid-end at optimization levels -O1 and higher. */
9751 if (BYTES_BIG_ENDIAN && (location != 0))
9753 /* After setup, we want the high elements of the first vector (stored
9754 at the LSB end of the register), and the low elements of the second
9755 vector (stored at the MSB end of the register). So swap. */
9756 rtx temp = d->op0;
9757 d->op0 = d->op1;
9758 d->op1 = temp;
9759 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9760 location = nelt - location;
9763 offset = GEN_INT (location);
9764 emit_insn (gen (d->target, d->op0, d->op1, offset));
9765 return true;
9768 /* Recognize patterns for the REV insns. */
9770 static bool
9771 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9773 unsigned int i, j, diff, nelt = d->nelt;
9774 rtx (*gen) (rtx, rtx);
9776 if (!d->one_vector_p)
9777 return false;
9779 diff = d->perm[0];
9780 switch (diff)
9782 case 7:
9783 switch (d->vmode)
9785 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9786 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9787 default:
9788 return false;
9790 break;
9791 case 3:
9792 switch (d->vmode)
9794 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9795 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9796 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9797 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9798 default:
9799 return false;
9801 break;
9802 case 1:
9803 switch (d->vmode)
9805 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9806 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9807 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9808 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9809 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9810 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9811 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9812 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9813 default:
9814 return false;
9816 break;
9817 default:
9818 return false;
9821 for (i = 0; i < nelt ; i += diff + 1)
9822 for (j = 0; j <= diff; j += 1)
9824 /* This is guaranteed to be true as the value of diff
9825 is 7, 3, 1 and we should have enough elements in the
9826 queue to generate this. Getting a vector mask with a
9827 value of diff other than these values implies that
9828 something is wrong by the time we get here. */
9829 gcc_assert (i + j < nelt);
9830 if (d->perm[i + j] != i + diff - j)
9831 return false;
9834 /* Success! */
9835 if (d->testing_p)
9836 return true;
9838 emit_insn (gen (d->target, d->op0));
9839 return true;
9842 static bool
9843 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9845 rtx (*gen) (rtx, rtx, rtx);
9846 rtx out = d->target;
9847 rtx in0;
9848 machine_mode vmode = d->vmode;
9849 unsigned int i, elt, nelt = d->nelt;
9850 rtx lane;
9852 elt = d->perm[0];
9853 for (i = 1; i < nelt; i++)
9855 if (elt != d->perm[i])
9856 return false;
9859 /* The generic preparation in aarch64_expand_vec_perm_const_1
9860 swaps the operand order and the permute indices if it finds
9861 d->perm[0] to be in the second operand. Thus, we can always
9862 use d->op0 and need not do any extra arithmetic to get the
9863 correct lane number. */
9864 in0 = d->op0;
9865 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
9867 switch (vmode)
9869 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9870 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9871 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9872 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9873 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9874 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9875 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9876 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9877 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9878 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9879 default:
9880 return false;
9883 emit_insn (gen (out, in0, lane));
9884 return true;
9887 static bool
9888 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9890 rtx rperm[MAX_VECT_LEN], sel;
9891 machine_mode vmode = d->vmode;
9892 unsigned int i, nelt = d->nelt;
9894 if (d->testing_p)
9895 return true;
9897 /* Generic code will try constant permutation twice. Once with the
9898 original mode and again with the elements lowered to QImode.
9899 So wait and don't do the selector expansion ourselves. */
9900 if (vmode != V8QImode && vmode != V16QImode)
9901 return false;
9903 for (i = 0; i < nelt; ++i)
9905 int nunits = GET_MODE_NUNITS (vmode);
9907 /* If big-endian and two vectors we end up with a weird mixed-endian
9908 mode on NEON. Reverse the index within each word but not the word
9909 itself. */
9910 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9911 : d->perm[i]);
9913 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9914 sel = force_reg (vmode, sel);
9916 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9917 return true;
9920 static bool
9921 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9923 /* The pattern matching functions above are written to look for a small
9924 number to begin the sequence (0, 1, N/2). If we begin with an index
9925 from the second operand, we can swap the operands. */
9926 if (d->perm[0] >= d->nelt)
9928 unsigned i, nelt = d->nelt;
9929 rtx x;
9931 gcc_assert (nelt == (nelt & -nelt));
9932 for (i = 0; i < nelt; ++i)
9933 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9935 x = d->op0;
9936 d->op0 = d->op1;
9937 d->op1 = x;
9940 if (TARGET_SIMD)
9942 if (aarch64_evpc_rev (d))
9943 return true;
9944 else if (aarch64_evpc_ext (d))
9945 return true;
9946 else if (aarch64_evpc_dup (d))
9947 return true;
9948 else if (aarch64_evpc_zip (d))
9949 return true;
9950 else if (aarch64_evpc_uzp (d))
9951 return true;
9952 else if (aarch64_evpc_trn (d))
9953 return true;
9954 return aarch64_evpc_tbl (d);
9956 return false;
9959 /* Expand a vec_perm_const pattern. */
9961 bool
9962 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9964 struct expand_vec_perm_d d;
9965 int i, nelt, which;
9967 d.target = target;
9968 d.op0 = op0;
9969 d.op1 = op1;
9971 d.vmode = GET_MODE (target);
9972 gcc_assert (VECTOR_MODE_P (d.vmode));
9973 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9974 d.testing_p = false;
9976 for (i = which = 0; i < nelt; ++i)
9978 rtx e = XVECEXP (sel, 0, i);
9979 int ei = INTVAL (e) & (2 * nelt - 1);
9980 which |= (ei < nelt ? 1 : 2);
9981 d.perm[i] = ei;
9984 switch (which)
9986 default:
9987 gcc_unreachable ();
9989 case 3:
9990 d.one_vector_p = false;
9991 if (!rtx_equal_p (op0, op1))
9992 break;
9994 /* The elements of PERM do not suggest that only the first operand
9995 is used, but both operands are identical. Allow easier matching
9996 of the permutation by folding the permutation into the single
9997 input vector. */
9998 /* Fall Through. */
9999 case 2:
10000 for (i = 0; i < nelt; ++i)
10001 d.perm[i] &= nelt - 1;
10002 d.op0 = op1;
10003 d.one_vector_p = true;
10004 break;
10006 case 1:
10007 d.op1 = op0;
10008 d.one_vector_p = true;
10009 break;
10012 return aarch64_expand_vec_perm_const_1 (&d);
10015 static bool
10016 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
10017 const unsigned char *sel)
10019 struct expand_vec_perm_d d;
10020 unsigned int i, nelt, which;
10021 bool ret;
10023 d.vmode = vmode;
10024 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10025 d.testing_p = true;
10026 memcpy (d.perm, sel, nelt);
10028 /* Calculate whether all elements are in one vector. */
10029 for (i = which = 0; i < nelt; ++i)
10031 unsigned char e = d.perm[i];
10032 gcc_assert (e < 2 * nelt);
10033 which |= (e < nelt ? 1 : 2);
10036 /* If all elements are from the second vector, reindex as if from the
10037 first vector. */
10038 if (which == 2)
10039 for (i = 0; i < nelt; ++i)
10040 d.perm[i] -= nelt;
10042 /* Check whether the mask can be applied to a single vector. */
10043 d.one_vector_p = (which != 3);
10045 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10046 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10047 if (!d.one_vector_p)
10048 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10050 start_sequence ();
10051 ret = aarch64_expand_vec_perm_const_1 (&d);
10052 end_sequence ();
10054 return ret;
10057 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10058 bool
10059 aarch64_cannot_change_mode_class (machine_mode from,
10060 machine_mode to,
10061 enum reg_class rclass)
10063 /* Full-reg subregs are allowed on general regs or any class if they are
10064 the same size. */
10065 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10066 || !reg_classes_intersect_p (FP_REGS, rclass))
10067 return false;
10069 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10070 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10071 2. Scalar to Scalar for integer modes or same size float modes.
10072 3. Vector to Vector modes.
10073 4. On little-endian only, Vector-Structure to Vector modes. */
10074 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
10076 if (aarch64_vector_mode_supported_p (from)
10077 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
10078 return false;
10080 if (GET_MODE_NUNITS (from) == 1
10081 && GET_MODE_NUNITS (to) == 1
10082 && (GET_MODE_CLASS (from) == MODE_INT
10083 || from == to))
10084 return false;
10086 if (aarch64_vector_mode_supported_p (from)
10087 && aarch64_vector_mode_supported_p (to))
10088 return false;
10090 /* Within an vector structure straddling multiple vector registers
10091 we are in a mixed-endian representation. As such, we can't
10092 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10093 switch between vectors and vector structures cheaply. */
10094 if (!BYTES_BIG_ENDIAN)
10095 if ((aarch64_vector_mode_supported_p (from)
10096 && aarch64_vect_struct_mode_p (to))
10097 || (aarch64_vector_mode_supported_p (to)
10098 && aarch64_vect_struct_mode_p (from)))
10099 return false;
10102 return true;
10105 /* Implement MODES_TIEABLE_P. */
10107 bool
10108 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10110 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10111 return true;
10113 /* We specifically want to allow elements of "structure" modes to
10114 be tieable to the structure. This more general condition allows
10115 other rarer situations too. */
10116 if (TARGET_SIMD
10117 && aarch64_vector_mode_p (mode1)
10118 && aarch64_vector_mode_p (mode2))
10119 return true;
10121 return false;
10124 /* Return a new RTX holding the result of moving POINTER forward by
10125 AMOUNT bytes. */
10127 static rtx
10128 aarch64_move_pointer (rtx pointer, int amount)
10130 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10132 return adjust_automodify_address (pointer, GET_MODE (pointer),
10133 next, amount);
10136 /* Return a new RTX holding the result of moving POINTER forward by the
10137 size of the mode it points to. */
10139 static rtx
10140 aarch64_progress_pointer (rtx pointer)
10142 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10144 return aarch64_move_pointer (pointer, amount);
10147 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10148 MODE bytes. */
10150 static void
10151 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
10152 machine_mode mode)
10154 rtx reg = gen_reg_rtx (mode);
10156 /* "Cast" the pointers to the correct mode. */
10157 *src = adjust_address (*src, mode, 0);
10158 *dst = adjust_address (*dst, mode, 0);
10159 /* Emit the memcpy. */
10160 emit_move_insn (reg, *src);
10161 emit_move_insn (*dst, reg);
10162 /* Move the pointers forward. */
10163 *src = aarch64_progress_pointer (*src);
10164 *dst = aarch64_progress_pointer (*dst);
10167 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10168 we succeed, otherwise return false. */
10170 bool
10171 aarch64_expand_movmem (rtx *operands)
10173 unsigned int n;
10174 rtx dst = operands[0];
10175 rtx src = operands[1];
10176 rtx base;
10177 bool speed_p = !optimize_function_for_size_p (cfun);
10179 /* When optimizing for size, give a better estimate of the length of a
10180 memcpy call, but use the default otherwise. */
10181 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10183 /* We can't do anything smart if the amount to copy is not constant. */
10184 if (!CONST_INT_P (operands[2]))
10185 return false;
10187 n = UINTVAL (operands[2]);
10189 /* Try to keep the number of instructions low. For cases below 16 bytes we
10190 need to make at most two moves. For cases above 16 bytes it will be one
10191 move for each 16 byte chunk, then at most two additional moves. */
10192 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10193 return false;
10195 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10196 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10198 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10199 src = adjust_automodify_address (src, VOIDmode, base, 0);
10201 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10202 1-byte chunk. */
10203 if (n < 4)
10205 if (n >= 2)
10207 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10208 n -= 2;
10211 if (n == 1)
10212 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10214 return true;
10217 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10218 4-byte chunk, partially overlapping with the previously copied chunk. */
10219 if (n < 8)
10221 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10222 n -= 4;
10223 if (n > 0)
10225 int move = n - 4;
10227 src = aarch64_move_pointer (src, move);
10228 dst = aarch64_move_pointer (dst, move);
10229 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10231 return true;
10234 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10235 them, then (if applicable) an 8-byte chunk. */
10236 while (n >= 8)
10238 if (n / 16)
10240 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10241 n -= 16;
10243 else
10245 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10246 n -= 8;
10250 /* Finish the final bytes of the copy. We can always do this in one
10251 instruction. We either copy the exact amount we need, or partially
10252 overlap with the previous chunk we copied and copy 8-bytes. */
10253 if (n == 0)
10254 return true;
10255 else if (n == 1)
10256 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10257 else if (n == 2)
10258 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10259 else if (n == 4)
10260 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10261 else
10263 if (n == 3)
10265 src = aarch64_move_pointer (src, -1);
10266 dst = aarch64_move_pointer (dst, -1);
10267 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10269 else
10271 int move = n - 8;
10273 src = aarch64_move_pointer (src, move);
10274 dst = aarch64_move_pointer (dst, move);
10275 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10279 return true;
10282 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10284 static unsigned HOST_WIDE_INT
10285 aarch64_asan_shadow_offset (void)
10287 return (HOST_WIDE_INT_1 << 36);
10290 static bool
10291 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10292 unsigned int align,
10293 enum by_pieces_operation op,
10294 bool speed_p)
10296 /* STORE_BY_PIECES can be used when copying a constant string, but
10297 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10298 For now we always fail this and let the move_by_pieces code copy
10299 the string from read-only memory. */
10300 if (op == STORE_BY_PIECES)
10301 return false;
10303 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10306 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10307 instruction fusion of some sort. */
10309 static bool
10310 aarch64_macro_fusion_p (void)
10312 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10316 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10317 should be kept together during scheduling. */
10319 static bool
10320 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10322 rtx set_dest;
10323 rtx prev_set = single_set (prev);
10324 rtx curr_set = single_set (curr);
10325 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10326 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10328 if (!aarch64_macro_fusion_p ())
10329 return false;
10331 if (simple_sets_p
10332 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10334 /* We are trying to match:
10335 prev (mov) == (set (reg r0) (const_int imm16))
10336 curr (movk) == (set (zero_extract (reg r0)
10337 (const_int 16)
10338 (const_int 16))
10339 (const_int imm16_1)) */
10341 set_dest = SET_DEST (curr_set);
10343 if (GET_CODE (set_dest) == ZERO_EXTRACT
10344 && CONST_INT_P (SET_SRC (curr_set))
10345 && CONST_INT_P (SET_SRC (prev_set))
10346 && CONST_INT_P (XEXP (set_dest, 2))
10347 && INTVAL (XEXP (set_dest, 2)) == 16
10348 && REG_P (XEXP (set_dest, 0))
10349 && REG_P (SET_DEST (prev_set))
10350 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10352 return true;
10356 if (simple_sets_p
10357 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10360 /* We're trying to match:
10361 prev (adrp) == (set (reg r1)
10362 (high (symbol_ref ("SYM"))))
10363 curr (add) == (set (reg r0)
10364 (lo_sum (reg r1)
10365 (symbol_ref ("SYM"))))
10366 Note that r0 need not necessarily be the same as r1, especially
10367 during pre-regalloc scheduling. */
10369 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10370 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10372 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10373 && REG_P (XEXP (SET_SRC (curr_set), 0))
10374 && REGNO (XEXP (SET_SRC (curr_set), 0))
10375 == REGNO (SET_DEST (prev_set))
10376 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10377 XEXP (SET_SRC (curr_set), 1)))
10378 return true;
10382 if (simple_sets_p
10383 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10386 /* We're trying to match:
10387 prev (movk) == (set (zero_extract (reg r0)
10388 (const_int 16)
10389 (const_int 32))
10390 (const_int imm16_1))
10391 curr (movk) == (set (zero_extract (reg r0)
10392 (const_int 16)
10393 (const_int 48))
10394 (const_int imm16_2)) */
10396 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10397 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10398 && REG_P (XEXP (SET_DEST (prev_set), 0))
10399 && REG_P (XEXP (SET_DEST (curr_set), 0))
10400 && REGNO (XEXP (SET_DEST (prev_set), 0))
10401 == REGNO (XEXP (SET_DEST (curr_set), 0))
10402 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10403 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10404 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10405 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10406 && CONST_INT_P (SET_SRC (prev_set))
10407 && CONST_INT_P (SET_SRC (curr_set)))
10408 return true;
10411 if (simple_sets_p
10412 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10414 /* We're trying to match:
10415 prev (adrp) == (set (reg r0)
10416 (high (symbol_ref ("SYM"))))
10417 curr (ldr) == (set (reg r1)
10418 (mem (lo_sum (reg r0)
10419 (symbol_ref ("SYM")))))
10421 curr (ldr) == (set (reg r1)
10422 (zero_extend (mem
10423 (lo_sum (reg r0)
10424 (symbol_ref ("SYM")))))) */
10425 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10426 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10428 rtx curr_src = SET_SRC (curr_set);
10430 if (GET_CODE (curr_src) == ZERO_EXTEND)
10431 curr_src = XEXP (curr_src, 0);
10433 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10434 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10435 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10436 == REGNO (SET_DEST (prev_set))
10437 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10438 XEXP (SET_SRC (prev_set), 0)))
10439 return true;
10443 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10444 && any_condjump_p (curr))
10446 enum attr_type prev_type = get_attr_type (prev);
10448 /* FIXME: this misses some which is considered simple arthematic
10449 instructions for ThunderX. Simple shifts are missed here. */
10450 if (prev_type == TYPE_ALUS_SREG
10451 || prev_type == TYPE_ALUS_IMM
10452 || prev_type == TYPE_LOGICS_REG
10453 || prev_type == TYPE_LOGICS_IMM)
10454 return true;
10457 return false;
10460 /* If MEM is in the form of [base+offset], extract the two parts
10461 of address and set to BASE and OFFSET, otherwise return false
10462 after clearing BASE and OFFSET. */
10464 bool
10465 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10467 rtx addr;
10469 gcc_assert (MEM_P (mem));
10471 addr = XEXP (mem, 0);
10473 if (REG_P (addr))
10475 *base = addr;
10476 *offset = const0_rtx;
10477 return true;
10480 if (GET_CODE (addr) == PLUS
10481 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10483 *base = XEXP (addr, 0);
10484 *offset = XEXP (addr, 1);
10485 return true;
10488 *base = NULL_RTX;
10489 *offset = NULL_RTX;
10491 return false;
10494 /* Types for scheduling fusion. */
10495 enum sched_fusion_type
10497 SCHED_FUSION_NONE = 0,
10498 SCHED_FUSION_LD_SIGN_EXTEND,
10499 SCHED_FUSION_LD_ZERO_EXTEND,
10500 SCHED_FUSION_LD,
10501 SCHED_FUSION_ST,
10502 SCHED_FUSION_NUM
10505 /* If INSN is a load or store of address in the form of [base+offset],
10506 extract the two parts and set to BASE and OFFSET. Return scheduling
10507 fusion type this INSN is. */
10509 static enum sched_fusion_type
10510 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10512 rtx x, dest, src;
10513 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10515 gcc_assert (INSN_P (insn));
10516 x = PATTERN (insn);
10517 if (GET_CODE (x) != SET)
10518 return SCHED_FUSION_NONE;
10520 src = SET_SRC (x);
10521 dest = SET_DEST (x);
10523 if (GET_MODE (src) != SImode && GET_MODE (src) != DImode
10524 && GET_MODE (src) != SFmode && GET_MODE (src) != DFmode)
10525 return SCHED_FUSION_NONE;
10527 if (GET_CODE (src) == SIGN_EXTEND)
10529 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10530 src = XEXP (src, 0);
10531 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10532 return SCHED_FUSION_NONE;
10534 else if (GET_CODE (src) == ZERO_EXTEND)
10536 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10537 src = XEXP (src, 0);
10538 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10539 return SCHED_FUSION_NONE;
10542 if (GET_CODE (src) == MEM && REG_P (dest))
10543 extract_base_offset_in_addr (src, base, offset);
10544 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10546 fusion = SCHED_FUSION_ST;
10547 extract_base_offset_in_addr (dest, base, offset);
10549 else
10550 return SCHED_FUSION_NONE;
10552 if (*base == NULL_RTX || *offset == NULL_RTX)
10553 fusion = SCHED_FUSION_NONE;
10555 return fusion;
10558 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10560 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10561 and PRI are only calculated for these instructions. For other instruction,
10562 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10563 type instruction fusion can be added by returning different priorities.
10565 It's important that irrelevant instructions get the largest FUSION_PRI. */
10567 static void
10568 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10569 int *fusion_pri, int *pri)
10571 int tmp, off_val;
10572 rtx base, offset;
10573 enum sched_fusion_type fusion;
10575 gcc_assert (INSN_P (insn));
10577 tmp = max_pri - 1;
10578 fusion = fusion_load_store (insn, &base, &offset);
10579 if (fusion == SCHED_FUSION_NONE)
10581 *pri = tmp;
10582 *fusion_pri = tmp;
10583 return;
10586 /* Set FUSION_PRI according to fusion type and base register. */
10587 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10589 /* Calculate PRI. */
10590 tmp /= 2;
10592 /* INSN with smaller offset goes first. */
10593 off_val = (int)(INTVAL (offset));
10594 if (off_val >= 0)
10595 tmp -= (off_val & 0xfffff);
10596 else
10597 tmp += ((- off_val) & 0xfffff);
10599 *pri = tmp;
10600 return;
10603 /* Given OPERANDS of consecutive load/store, check if we can merge
10604 them into ldp/stp. LOAD is true if they are load instructions.
10605 MODE is the mode of memory operands. */
10607 bool
10608 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10609 enum machine_mode mode)
10611 HOST_WIDE_INT offval_1, offval_2, msize;
10612 enum reg_class rclass_1, rclass_2;
10613 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10615 if (load)
10617 mem_1 = operands[1];
10618 mem_2 = operands[3];
10619 reg_1 = operands[0];
10620 reg_2 = operands[2];
10621 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10622 if (REGNO (reg_1) == REGNO (reg_2))
10623 return false;
10625 else
10627 mem_1 = operands[0];
10628 mem_2 = operands[2];
10629 reg_1 = operands[1];
10630 reg_2 = operands[3];
10633 /* The mems cannot be volatile. */
10634 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10635 return false;
10637 /* Check if the addresses are in the form of [base+offset]. */
10638 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10639 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10640 return false;
10641 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10642 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10643 return false;
10645 /* Check if the bases are same. */
10646 if (!rtx_equal_p (base_1, base_2))
10647 return false;
10649 offval_1 = INTVAL (offset_1);
10650 offval_2 = INTVAL (offset_2);
10651 msize = GET_MODE_SIZE (mode);
10652 /* Check if the offsets are consecutive. */
10653 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10654 return false;
10656 /* Check if the addresses are clobbered by load. */
10657 if (load)
10659 if (reg_mentioned_p (reg_1, mem_1))
10660 return false;
10662 /* In increasing order, the last load can clobber the address. */
10663 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10664 return false;
10667 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10668 rclass_1 = FP_REGS;
10669 else
10670 rclass_1 = GENERAL_REGS;
10672 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10673 rclass_2 = FP_REGS;
10674 else
10675 rclass_2 = GENERAL_REGS;
10677 /* Check if the registers are of same class. */
10678 if (rclass_1 != rclass_2)
10679 return false;
10681 return true;
10684 /* Given OPERANDS of consecutive load/store, check if we can merge
10685 them into ldp/stp by adjusting the offset. LOAD is true if they
10686 are load instructions. MODE is the mode of memory operands.
10688 Given below consecutive stores:
10690 str w1, [xb, 0x100]
10691 str w1, [xb, 0x104]
10692 str w1, [xb, 0x108]
10693 str w1, [xb, 0x10c]
10695 Though the offsets are out of the range supported by stp, we can
10696 still pair them after adjusting the offset, like:
10698 add scratch, xb, 0x100
10699 stp w1, w1, [scratch]
10700 stp w1, w1, [scratch, 0x8]
10702 The peephole patterns detecting this opportunity should guarantee
10703 the scratch register is avaliable. */
10705 bool
10706 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10707 enum machine_mode mode)
10709 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10710 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10711 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10712 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10714 if (load)
10716 reg_1 = operands[0];
10717 mem_1 = operands[1];
10718 reg_2 = operands[2];
10719 mem_2 = operands[3];
10720 reg_3 = operands[4];
10721 mem_3 = operands[5];
10722 reg_4 = operands[6];
10723 mem_4 = operands[7];
10724 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10725 && REG_P (reg_3) && REG_P (reg_4));
10726 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10727 return false;
10729 else
10731 mem_1 = operands[0];
10732 reg_1 = operands[1];
10733 mem_2 = operands[2];
10734 reg_2 = operands[3];
10735 mem_3 = operands[4];
10736 reg_3 = operands[5];
10737 mem_4 = operands[6];
10738 reg_4 = operands[7];
10740 /* Skip if memory operand is by itslef valid for ldp/stp. */
10741 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
10742 return false;
10744 /* The mems cannot be volatile. */
10745 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
10746 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
10747 return false;
10749 /* Check if the addresses are in the form of [base+offset]. */
10750 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10751 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10752 return false;
10753 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10754 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10755 return false;
10756 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
10757 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
10758 return false;
10759 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
10760 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
10761 return false;
10763 /* Check if the bases are same. */
10764 if (!rtx_equal_p (base_1, base_2)
10765 || !rtx_equal_p (base_2, base_3)
10766 || !rtx_equal_p (base_3, base_4))
10767 return false;
10769 offval_1 = INTVAL (offset_1);
10770 offval_2 = INTVAL (offset_2);
10771 offval_3 = INTVAL (offset_3);
10772 offval_4 = INTVAL (offset_4);
10773 msize = GET_MODE_SIZE (mode);
10774 /* Check if the offsets are consecutive. */
10775 if ((offval_1 != (offval_2 + msize)
10776 || offval_1 != (offval_3 + msize * 2)
10777 || offval_1 != (offval_4 + msize * 3))
10778 && (offval_4 != (offval_3 + msize)
10779 || offval_4 != (offval_2 + msize * 2)
10780 || offval_4 != (offval_1 + msize * 3)))
10781 return false;
10783 /* Check if the addresses are clobbered by load. */
10784 if (load)
10786 if (reg_mentioned_p (reg_1, mem_1)
10787 || reg_mentioned_p (reg_2, mem_2)
10788 || reg_mentioned_p (reg_3, mem_3))
10789 return false;
10791 /* In increasing order, the last load can clobber the address. */
10792 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
10793 return false;
10796 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10797 rclass_1 = FP_REGS;
10798 else
10799 rclass_1 = GENERAL_REGS;
10801 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10802 rclass_2 = FP_REGS;
10803 else
10804 rclass_2 = GENERAL_REGS;
10806 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
10807 rclass_3 = FP_REGS;
10808 else
10809 rclass_3 = GENERAL_REGS;
10811 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
10812 rclass_4 = FP_REGS;
10813 else
10814 rclass_4 = GENERAL_REGS;
10816 /* Check if the registers are of same class. */
10817 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
10818 return false;
10820 return true;
10823 /* Given OPERANDS of consecutive load/store, this function pairs them
10824 into ldp/stp after adjusting the offset. It depends on the fact
10825 that addresses of load/store instructions are in increasing order.
10826 MODE is the mode of memory operands. CODE is the rtl operator
10827 which should be applied to all memory operands, it's SIGN_EXTEND,
10828 ZERO_EXTEND or UNKNOWN. */
10830 bool
10831 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
10832 enum machine_mode mode, RTX_CODE code)
10834 rtx base, offset, t1, t2;
10835 rtx mem_1, mem_2, mem_3, mem_4;
10836 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
10838 if (load)
10840 mem_1 = operands[1];
10841 mem_2 = operands[3];
10842 mem_3 = operands[5];
10843 mem_4 = operands[7];
10845 else
10847 mem_1 = operands[0];
10848 mem_2 = operands[2];
10849 mem_3 = operands[4];
10850 mem_4 = operands[6];
10851 gcc_assert (code == UNKNOWN);
10854 extract_base_offset_in_addr (mem_1, &base, &offset);
10855 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
10857 /* Adjust offset thus it can fit in ldp/stp instruction. */
10858 msize = GET_MODE_SIZE (mode);
10859 stp_off_limit = msize * 0x40;
10860 off_val = INTVAL (offset);
10861 abs_off = (off_val < 0) ? -off_val : off_val;
10862 new_off = abs_off % stp_off_limit;
10863 adj_off = abs_off - new_off;
10865 /* Further adjust to make sure all offsets are OK. */
10866 if ((new_off + msize * 2) >= stp_off_limit)
10868 adj_off += stp_off_limit;
10869 new_off -= stp_off_limit;
10872 /* Make sure the adjustment can be done with ADD/SUB instructions. */
10873 if (adj_off >= 0x1000)
10874 return false;
10876 if (off_val < 0)
10878 adj_off = -adj_off;
10879 new_off = -new_off;
10882 /* Create new memory references. */
10883 mem_1 = change_address (mem_1, VOIDmode,
10884 plus_constant (DImode, operands[8], new_off));
10886 /* Check if the adjusted address is OK for ldp/stp. */
10887 if (!aarch64_mem_pair_operand (mem_1, mode))
10888 return false;
10890 msize = GET_MODE_SIZE (mode);
10891 mem_2 = change_address (mem_2, VOIDmode,
10892 plus_constant (DImode,
10893 operands[8],
10894 new_off + msize));
10895 mem_3 = change_address (mem_3, VOIDmode,
10896 plus_constant (DImode,
10897 operands[8],
10898 new_off + msize * 2));
10899 mem_4 = change_address (mem_4, VOIDmode,
10900 plus_constant (DImode,
10901 operands[8],
10902 new_off + msize * 3));
10904 if (code == ZERO_EXTEND)
10906 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
10907 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
10908 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
10909 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
10911 else if (code == SIGN_EXTEND)
10913 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
10914 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
10915 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
10916 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
10919 if (load)
10921 operands[1] = mem_1;
10922 operands[3] = mem_2;
10923 operands[5] = mem_3;
10924 operands[7] = mem_4;
10926 else
10928 operands[0] = mem_1;
10929 operands[2] = mem_2;
10930 operands[4] = mem_3;
10931 operands[6] = mem_4;
10934 /* Emit adjusting instruction. */
10935 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
10936 plus_constant (DImode, base, adj_off)));
10937 /* Emit ldp/stp instructions. */
10938 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
10939 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
10940 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10941 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
10942 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
10943 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10944 return true;
10947 #undef TARGET_ADDRESS_COST
10948 #define TARGET_ADDRESS_COST aarch64_address_cost
10950 /* This hook will determines whether unnamed bitfields affect the alignment
10951 of the containing structure. The hook returns true if the structure
10952 should inherit the alignment requirements of an unnamed bitfield's
10953 type. */
10954 #undef TARGET_ALIGN_ANON_BITFIELD
10955 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10957 #undef TARGET_ASM_ALIGNED_DI_OP
10958 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10960 #undef TARGET_ASM_ALIGNED_HI_OP
10961 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10963 #undef TARGET_ASM_ALIGNED_SI_OP
10964 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10966 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10967 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10968 hook_bool_const_tree_hwi_hwi_const_tree_true
10970 #undef TARGET_ASM_FILE_START
10971 #define TARGET_ASM_FILE_START aarch64_start_file
10973 #undef TARGET_ASM_OUTPUT_MI_THUNK
10974 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10976 #undef TARGET_ASM_SELECT_RTX_SECTION
10977 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10979 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10980 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10982 #undef TARGET_BUILD_BUILTIN_VA_LIST
10983 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10985 #undef TARGET_CALLEE_COPIES
10986 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10988 #undef TARGET_CAN_ELIMINATE
10989 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10991 #undef TARGET_CANNOT_FORCE_CONST_MEM
10992 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10994 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10995 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10997 /* Only the least significant bit is used for initialization guard
10998 variables. */
10999 #undef TARGET_CXX_GUARD_MASK_BIT
11000 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11002 #undef TARGET_C_MODE_FOR_SUFFIX
11003 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11005 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11006 #undef TARGET_DEFAULT_TARGET_FLAGS
11007 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11008 #endif
11010 #undef TARGET_CLASS_MAX_NREGS
11011 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11013 #undef TARGET_BUILTIN_DECL
11014 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11016 #undef TARGET_EXPAND_BUILTIN
11017 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11019 #undef TARGET_EXPAND_BUILTIN_VA_START
11020 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11022 #undef TARGET_FOLD_BUILTIN
11023 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11025 #undef TARGET_FUNCTION_ARG
11026 #define TARGET_FUNCTION_ARG aarch64_function_arg
11028 #undef TARGET_FUNCTION_ARG_ADVANCE
11029 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11031 #undef TARGET_FUNCTION_ARG_BOUNDARY
11032 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11034 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11035 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11037 #undef TARGET_FUNCTION_VALUE
11038 #define TARGET_FUNCTION_VALUE aarch64_function_value
11040 #undef TARGET_FUNCTION_VALUE_REGNO_P
11041 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11043 #undef TARGET_FRAME_POINTER_REQUIRED
11044 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11046 #undef TARGET_GIMPLE_FOLD_BUILTIN
11047 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11049 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11050 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11052 #undef TARGET_INIT_BUILTINS
11053 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11055 #undef TARGET_LEGITIMATE_ADDRESS_P
11056 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11058 #undef TARGET_LEGITIMATE_CONSTANT_P
11059 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11061 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11062 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11064 #undef TARGET_LRA_P
11065 #define TARGET_LRA_P aarch64_lra_p
11067 #undef TARGET_MANGLE_TYPE
11068 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11070 #undef TARGET_MEMORY_MOVE_COST
11071 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11073 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11074 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11076 #undef TARGET_MUST_PASS_IN_STACK
11077 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11079 /* This target hook should return true if accesses to volatile bitfields
11080 should use the narrowest mode possible. It should return false if these
11081 accesses should use the bitfield container type. */
11082 #undef TARGET_NARROW_VOLATILE_BITFIELD
11083 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11085 #undef TARGET_OPTION_OVERRIDE
11086 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11088 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11089 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11090 aarch64_override_options_after_change
11092 #undef TARGET_PASS_BY_REFERENCE
11093 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11095 #undef TARGET_PREFERRED_RELOAD_CLASS
11096 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11098 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11099 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11101 #undef TARGET_SECONDARY_RELOAD
11102 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11104 #undef TARGET_SHIFT_TRUNCATION_MASK
11105 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11107 #undef TARGET_SETUP_INCOMING_VARARGS
11108 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11110 #undef TARGET_STRUCT_VALUE_RTX
11111 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11113 #undef TARGET_REGISTER_MOVE_COST
11114 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11116 #undef TARGET_RETURN_IN_MEMORY
11117 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11119 #undef TARGET_RETURN_IN_MSB
11120 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11122 #undef TARGET_RTX_COSTS
11123 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11125 #undef TARGET_SCHED_ISSUE_RATE
11126 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11128 #undef TARGET_TRAMPOLINE_INIT
11129 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11131 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11132 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11134 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11135 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11137 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11138 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11140 #undef TARGET_VECTORIZE_ADD_STMT_COST
11141 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11143 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11144 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11145 aarch64_builtin_vectorization_cost
11147 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11148 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11150 #undef TARGET_VECTORIZE_BUILTINS
11151 #define TARGET_VECTORIZE_BUILTINS
11153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11155 aarch64_builtin_vectorized_function
11157 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11158 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11159 aarch64_autovectorize_vector_sizes
11161 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11162 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11163 aarch64_atomic_assign_expand_fenv
11165 /* Section anchor support. */
11167 #undef TARGET_MIN_ANCHOR_OFFSET
11168 #define TARGET_MIN_ANCHOR_OFFSET -256
11170 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11171 byte offset; we can do much more for larger data types, but have no way
11172 to determine the size of the access. We assume accesses are aligned. */
11173 #undef TARGET_MAX_ANCHOR_OFFSET
11174 #define TARGET_MAX_ANCHOR_OFFSET 4095
11176 #undef TARGET_VECTOR_ALIGNMENT
11177 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11179 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11180 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11181 aarch64_simd_vector_alignment_reachable
11183 /* vec_perm support. */
11185 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11186 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11187 aarch64_vectorize_vec_perm_const_ok
11190 #undef TARGET_FIXED_CONDITION_CODE_REGS
11191 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11193 #undef TARGET_FLAGS_REGNUM
11194 #define TARGET_FLAGS_REGNUM CC_REGNUM
11196 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11197 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11199 #undef TARGET_ASAN_SHADOW_OFFSET
11200 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11202 #undef TARGET_LEGITIMIZE_ADDRESS
11203 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11205 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11206 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11207 aarch64_use_by_pieces_infrastructure_p
11209 #undef TARGET_CAN_USE_DOLOOP_P
11210 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11212 #undef TARGET_SCHED_MACRO_FUSION_P
11213 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11215 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11216 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11218 #undef TARGET_SCHED_FUSION_PRIORITY
11219 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11221 struct gcc_target targetm = TARGET_INITIALIZER;
11223 #include "gt-aarch64.h"