* config/aarch64/aarch64.c (*aarch64_load_symref_appropriately):
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobcba3c1a4d42c7d543e0ed96a7b41fcd9c925f245
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
41 #include "calls.h"
42 #include "varasm.h"
43 #include "regs.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfganal.h"
48 #include "lcm.h"
49 #include "cfgbuild.h"
50 #include "cfgcleanup.h"
51 #include "predict.h"
52 #include "basic-block.h"
53 #include "df.h"
54 #include "hard-reg-set.h"
55 #include "output.h"
56 #include "hashtab.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "statistics.h"
60 #include "real.h"
61 #include "fixed-value.h"
62 #include "insn-config.h"
63 #include "expmed.h"
64 #include "dojump.h"
65 #include "explow.h"
66 #include "emit-rtl.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "reload.h"
70 #include "toplev.h"
71 #include "target.h"
72 #include "target-def.h"
73 #include "targhooks.h"
74 #include "ggc.h"
75 #include "tm_p.h"
76 #include "recog.h"
77 #include "langhooks.h"
78 #include "diagnostic-core.h"
79 #include "hash-table.h"
80 #include "tree-ssa-alias.h"
81 #include "internal-fn.h"
82 #include "gimple-fold.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "is-a.h"
86 #include "gimple.h"
87 #include "gimplify.h"
88 #include "optabs.h"
89 #include "dwarf2.h"
90 #include "cfgloop.h"
91 #include "tree-vectorizer.h"
92 #include "aarch64-cost-tables.h"
93 #include "dumpfile.h"
94 #include "builtins.h"
95 #include "rtl-iter.h"
96 #include "tm-constrs.h"
97 #include "sched-int.h"
99 /* Defined for convenience. */
100 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
102 /* Classifies an address.
104 ADDRESS_REG_IMM
105 A simple base register plus immediate offset.
107 ADDRESS_REG_WB
108 A base register indexed by immediate offset with writeback.
110 ADDRESS_REG_REG
111 A base register indexed by (optionally scaled) register.
113 ADDRESS_REG_UXTW
114 A base register indexed by (optionally scaled) zero-extended register.
116 ADDRESS_REG_SXTW
117 A base register indexed by (optionally scaled) sign-extended register.
119 ADDRESS_LO_SUM
120 A LO_SUM rtx with a base register and "LO12" symbol relocation.
122 ADDRESS_SYMBOLIC:
123 A constant symbolic address, in pc-relative literal pool. */
125 enum aarch64_address_type {
126 ADDRESS_REG_IMM,
127 ADDRESS_REG_WB,
128 ADDRESS_REG_REG,
129 ADDRESS_REG_UXTW,
130 ADDRESS_REG_SXTW,
131 ADDRESS_LO_SUM,
132 ADDRESS_SYMBOLIC
135 struct aarch64_address_info {
136 enum aarch64_address_type type;
137 rtx base;
138 rtx offset;
139 int shift;
140 enum aarch64_symbol_type symbol_type;
143 struct simd_immediate_info
145 rtx value;
146 int shift;
147 int element_width;
148 bool mvn;
149 bool msl;
152 /* The current code model. */
153 enum aarch64_code_model aarch64_cmodel;
155 #ifdef HAVE_AS_TLS
156 #undef TARGET_HAVE_TLS
157 #define TARGET_HAVE_TLS 1
158 #endif
160 static bool aarch64_composite_type_p (const_tree, machine_mode);
161 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
162 const_tree,
163 machine_mode *, int *,
164 bool *);
165 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
166 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
167 static void aarch64_override_options_after_change (void);
168 static bool aarch64_vector_mode_supported_p (machine_mode);
169 static unsigned bit_count (unsigned HOST_WIDE_INT);
170 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
171 const unsigned char *sel);
172 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
174 /* Major revision number of the ARM Architecture implemented by the target. */
175 unsigned aarch64_architecture_version;
177 /* The processor for which instructions should be scheduled. */
178 enum aarch64_processor aarch64_tune = cortexa53;
180 /* The current tuning set. */
181 const struct tune_params *aarch64_tune_params;
183 /* Mask to specify which instructions we are allowed to generate. */
184 unsigned long aarch64_isa_flags = 0;
186 /* Mask to specify which instruction scheduling options should be used. */
187 unsigned long aarch64_tune_flags = 0;
189 /* Tuning parameters. */
191 static const struct cpu_addrcost_table generic_addrcost_table =
194 0, /* hi */
195 0, /* si */
196 0, /* di */
197 0, /* ti */
199 0, /* pre_modify */
200 0, /* post_modify */
201 0, /* register_offset */
202 0, /* register_extend */
203 0 /* imm_offset */
206 static const struct cpu_addrcost_table cortexa57_addrcost_table =
209 1, /* hi */
210 0, /* si */
211 0, /* di */
212 1, /* ti */
214 0, /* pre_modify */
215 0, /* post_modify */
216 0, /* register_offset */
217 0, /* register_extend */
218 0, /* imm_offset */
221 static const struct cpu_addrcost_table xgene1_addrcost_table =
224 1, /* hi */
225 0, /* si */
226 0, /* di */
227 1, /* ti */
229 1, /* pre_modify */
230 0, /* post_modify */
231 0, /* register_offset */
232 1, /* register_extend */
233 0, /* imm_offset */
236 static const struct cpu_regmove_cost generic_regmove_cost =
238 1, /* GP2GP */
239 /* Avoid the use of slow int<->fp moves for spilling by setting
240 their cost higher than memmov_cost. */
241 5, /* GP2FP */
242 5, /* FP2GP */
243 2 /* FP2FP */
246 static const struct cpu_regmove_cost cortexa57_regmove_cost =
248 1, /* GP2GP */
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
251 5, /* GP2FP */
252 5, /* FP2GP */
253 2 /* FP2FP */
256 static const struct cpu_regmove_cost cortexa53_regmove_cost =
258 1, /* GP2GP */
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
261 5, /* GP2FP */
262 5, /* FP2GP */
263 2 /* FP2FP */
266 static const struct cpu_regmove_cost thunderx_regmove_cost =
268 2, /* GP2GP */
269 2, /* GP2FP */
270 6, /* FP2GP */
271 4 /* FP2FP */
274 static const struct cpu_regmove_cost xgene1_regmove_cost =
276 1, /* GP2GP */
277 /* Avoid the use of slow int<->fp moves for spilling by setting
278 their cost higher than memmov_cost. */
279 8, /* GP2FP */
280 8, /* FP2GP */
281 2 /* FP2FP */
284 /* Generic costs for vector insn classes. */
285 static const struct cpu_vector_cost generic_vector_cost =
287 1, /* scalar_stmt_cost */
288 1, /* scalar_load_cost */
289 1, /* scalar_store_cost */
290 1, /* vec_stmt_cost */
291 1, /* vec_to_scalar_cost */
292 1, /* scalar_to_vec_cost */
293 1, /* vec_align_load_cost */
294 1, /* vec_unalign_load_cost */
295 1, /* vec_unalign_store_cost */
296 1, /* vec_store_cost */
297 3, /* cond_taken_branch_cost */
298 1 /* cond_not_taken_branch_cost */
301 /* Generic costs for vector insn classes. */
302 static const struct cpu_vector_cost cortexa57_vector_cost =
304 1, /* scalar_stmt_cost */
305 4, /* scalar_load_cost */
306 1, /* scalar_store_cost */
307 3, /* vec_stmt_cost */
308 8, /* vec_to_scalar_cost */
309 8, /* scalar_to_vec_cost */
310 5, /* vec_align_load_cost */
311 5, /* vec_unalign_load_cost */
312 1, /* vec_unalign_store_cost */
313 1, /* vec_store_cost */
314 1, /* cond_taken_branch_cost */
315 1 /* cond_not_taken_branch_cost */
318 /* Generic costs for vector insn classes. */
319 static const struct cpu_vector_cost xgene1_vector_cost =
321 1, /* scalar_stmt_cost */
322 5, /* scalar_load_cost */
323 1, /* scalar_store_cost */
324 2, /* vec_stmt_cost */
325 4, /* vec_to_scalar_cost */
326 4, /* scalar_to_vec_cost */
327 10, /* vec_align_load_cost */
328 10, /* vec_unalign_load_cost */
329 2, /* vec_unalign_store_cost */
330 2, /* vec_store_cost */
331 2, /* cond_taken_branch_cost */
332 1 /* cond_not_taken_branch_cost */
335 #define AARCH64_FUSE_NOTHING (0)
336 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
337 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
338 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
339 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
340 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
342 static const struct tune_params generic_tunings =
344 &cortexa57_extra_costs,
345 &generic_addrcost_table,
346 &generic_regmove_cost,
347 &generic_vector_cost,
348 4, /* memmov_cost */
349 2, /* issue_rate */
350 AARCH64_FUSE_NOTHING, /* fuseable_ops */
351 8, /* function_align. */
352 8, /* jump_align. */
353 4, /* loop_align. */
354 2, /* int_reassoc_width. */
355 4, /* fp_reassoc_width. */
356 1 /* vec_reassoc_width. */
359 static const struct tune_params cortexa53_tunings =
361 &cortexa53_extra_costs,
362 &generic_addrcost_table,
363 &cortexa53_regmove_cost,
364 &generic_vector_cost,
365 4, /* memmov_cost */
366 2, /* issue_rate */
367 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
368 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */
369 8, /* function_align. */
370 8, /* jump_align. */
371 4, /* loop_align. */
372 2, /* int_reassoc_width. */
373 4, /* fp_reassoc_width. */
374 1 /* vec_reassoc_width. */
377 static const struct tune_params cortexa57_tunings =
379 &cortexa57_extra_costs,
380 &cortexa57_addrcost_table,
381 &cortexa57_regmove_cost,
382 &cortexa57_vector_cost,
383 4, /* memmov_cost */
384 3, /* issue_rate */
385 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
386 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
387 16, /* function_align. */
388 8, /* jump_align. */
389 4, /* loop_align. */
390 2, /* int_reassoc_width. */
391 4, /* fp_reassoc_width. */
392 1 /* vec_reassoc_width. */
395 static const struct tune_params thunderx_tunings =
397 &thunderx_extra_costs,
398 &generic_addrcost_table,
399 &thunderx_regmove_cost,
400 &generic_vector_cost,
401 6, /* memmov_cost */
402 2, /* issue_rate */
403 AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
404 8, /* function_align. */
405 8, /* jump_align. */
406 8, /* loop_align. */
407 2, /* int_reassoc_width. */
408 4, /* fp_reassoc_width. */
409 1 /* vec_reassoc_width. */
412 static const struct tune_params xgene1_tunings =
414 &xgene1_extra_costs,
415 &xgene1_addrcost_table,
416 &xgene1_regmove_cost,
417 &xgene1_vector_cost,
418 6, /* memmov_cost */
419 4, /* issue_rate */
420 AARCH64_FUSE_NOTHING, /* fuseable_ops */
421 16, /* function_align. */
422 8, /* jump_align. */
423 16, /* loop_align. */
424 2, /* int_reassoc_width. */
425 4, /* fp_reassoc_width. */
426 1 /* vec_reassoc_width. */
429 /* A processor implementing AArch64. */
430 struct processor
432 const char *const name;
433 enum aarch64_processor core;
434 const char *arch;
435 unsigned architecture_version;
436 const unsigned long flags;
437 const struct tune_params *const tune;
440 /* Processor cores implementing AArch64. */
441 static const struct processor all_cores[] =
443 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
444 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
445 #include "aarch64-cores.def"
446 #undef AARCH64_CORE
447 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
448 {NULL, aarch64_none, NULL, 0, 0, NULL}
451 /* Architectures implementing AArch64. */
452 static const struct processor all_architectures[] =
454 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
455 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
456 #include "aarch64-arches.def"
457 #undef AARCH64_ARCH
458 {NULL, aarch64_none, NULL, 0, 0, NULL}
461 /* Target specification. These are populated as commandline arguments
462 are processed, or NULL if not specified. */
463 static const struct processor *selected_arch;
464 static const struct processor *selected_cpu;
465 static const struct processor *selected_tune;
467 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
469 /* An ISA extension in the co-processor and main instruction set space. */
470 struct aarch64_option_extension
472 const char *const name;
473 const unsigned long flags_on;
474 const unsigned long flags_off;
477 /* ISA extensions in AArch64. */
478 static const struct aarch64_option_extension all_extensions[] =
480 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
481 {NAME, FLAGS_ON, FLAGS_OFF},
482 #include "aarch64-option-extensions.def"
483 #undef AARCH64_OPT_EXTENSION
484 {NULL, 0, 0}
487 /* Used to track the size of an address when generating a pre/post
488 increment address. */
489 static machine_mode aarch64_memory_reference_mode;
491 /* A table of valid AArch64 "bitmask immediate" values for
492 logical instructions. */
494 #define AARCH64_NUM_BITMASKS 5334
495 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
497 typedef enum aarch64_cond_code
499 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
500 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
501 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
503 aarch64_cc;
505 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
507 /* The condition codes of the processor, and the inverse function. */
508 static const char * const aarch64_condition_codes[] =
510 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
511 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
514 static unsigned int
515 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
517 return 2;
520 static int
521 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
522 enum machine_mode mode)
524 if (VECTOR_MODE_P (mode))
525 return aarch64_tune_params->vec_reassoc_width;
526 if (INTEGRAL_MODE_P (mode))
527 return aarch64_tune_params->int_reassoc_width;
528 if (FLOAT_MODE_P (mode))
529 return aarch64_tune_params->fp_reassoc_width;
530 return 1;
533 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
534 unsigned
535 aarch64_dbx_register_number (unsigned regno)
537 if (GP_REGNUM_P (regno))
538 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
539 else if (regno == SP_REGNUM)
540 return AARCH64_DWARF_SP;
541 else if (FP_REGNUM_P (regno))
542 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
544 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
545 equivalent DWARF register. */
546 return DWARF_FRAME_REGISTERS;
549 /* Return TRUE if MODE is any of the large INT modes. */
550 static bool
551 aarch64_vect_struct_mode_p (machine_mode mode)
553 return mode == OImode || mode == CImode || mode == XImode;
556 /* Return TRUE if MODE is any of the vector modes. */
557 static bool
558 aarch64_vector_mode_p (machine_mode mode)
560 return aarch64_vector_mode_supported_p (mode)
561 || aarch64_vect_struct_mode_p (mode);
564 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
565 static bool
566 aarch64_array_mode_supported_p (machine_mode mode,
567 unsigned HOST_WIDE_INT nelems)
569 if (TARGET_SIMD
570 && AARCH64_VALID_SIMD_QREG_MODE (mode)
571 && (nelems >= 2 && nelems <= 4))
572 return true;
574 return false;
577 /* Implement HARD_REGNO_NREGS. */
580 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
582 switch (aarch64_regno_regclass (regno))
584 case FP_REGS:
585 case FP_LO_REGS:
586 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
587 default:
588 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
590 gcc_unreachable ();
593 /* Implement HARD_REGNO_MODE_OK. */
596 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
598 if (GET_MODE_CLASS (mode) == MODE_CC)
599 return regno == CC_REGNUM;
601 if (regno == SP_REGNUM)
602 /* The purpose of comparing with ptr_mode is to support the
603 global register variable associated with the stack pointer
604 register via the syntax of asm ("wsp") in ILP32. */
605 return mode == Pmode || mode == ptr_mode;
607 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
608 return mode == Pmode;
610 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
611 return 1;
613 if (FP_REGNUM_P (regno))
615 if (aarch64_vect_struct_mode_p (mode))
616 return
617 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
618 else
619 return 1;
622 return 0;
625 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
626 machine_mode
627 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
628 machine_mode mode)
630 /* Handle modes that fit within single registers. */
631 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
633 if (GET_MODE_SIZE (mode) >= 4)
634 return mode;
635 else
636 return SImode;
638 /* Fall back to generic for multi-reg and very large modes. */
639 else
640 return choose_hard_reg_mode (regno, nregs, false);
643 /* Return true if calls to DECL should be treated as
644 long-calls (ie called via a register). */
645 static bool
646 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
648 return false;
651 /* Return true if calls to symbol-ref SYM should be treated as
652 long-calls (ie called via a register). */
653 bool
654 aarch64_is_long_call_p (rtx sym)
656 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
659 /* Return true if the offsets to a zero/sign-extract operation
660 represent an expression that matches an extend operation. The
661 operands represent the paramters from
663 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
664 bool
665 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
666 rtx extract_imm)
668 HOST_WIDE_INT mult_val, extract_val;
670 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
671 return false;
673 mult_val = INTVAL (mult_imm);
674 extract_val = INTVAL (extract_imm);
676 if (extract_val > 8
677 && extract_val < GET_MODE_BITSIZE (mode)
678 && exact_log2 (extract_val & ~7) > 0
679 && (extract_val & 7) <= 4
680 && mult_val == (1 << (extract_val & 7)))
681 return true;
683 return false;
686 /* Emit an insn that's a simple single-set. Both the operands must be
687 known to be valid. */
688 inline static rtx
689 emit_set_insn (rtx x, rtx y)
691 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
694 /* X and Y are two things to compare using CODE. Emit the compare insn and
695 return the rtx for register 0 in the proper mode. */
697 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
699 machine_mode mode = SELECT_CC_MODE (code, x, y);
700 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
702 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
703 return cc_reg;
706 /* Build the SYMBOL_REF for __tls_get_addr. */
708 static GTY(()) rtx tls_get_addr_libfunc;
711 aarch64_tls_get_addr (void)
713 if (!tls_get_addr_libfunc)
714 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
715 return tls_get_addr_libfunc;
718 /* Return the TLS model to use for ADDR. */
720 static enum tls_model
721 tls_symbolic_operand_type (rtx addr)
723 enum tls_model tls_kind = TLS_MODEL_NONE;
724 rtx sym, addend;
726 if (GET_CODE (addr) == CONST)
728 split_const (addr, &sym, &addend);
729 if (GET_CODE (sym) == SYMBOL_REF)
730 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
732 else if (GET_CODE (addr) == SYMBOL_REF)
733 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
735 return tls_kind;
738 /* We'll allow lo_sum's in addresses in our legitimate addresses
739 so that combine would take care of combining addresses where
740 necessary, but for generation purposes, we'll generate the address
741 as :
742 RTL Absolute
743 tmp = hi (symbol_ref); adrp x1, foo
744 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
747 PIC TLS
748 adrp x1, :got:foo adrp tmp, :tlsgd:foo
749 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
750 bl __tls_get_addr
753 Load TLS symbol, depending on TLS mechanism and TLS access model.
755 Global Dynamic - Traditional TLS:
756 adrp tmp, :tlsgd:imm
757 add dest, tmp, #:tlsgd_lo12:imm
758 bl __tls_get_addr
760 Global Dynamic - TLS Descriptors:
761 adrp dest, :tlsdesc:imm
762 ldr tmp, [dest, #:tlsdesc_lo12:imm]
763 add dest, dest, #:tlsdesc_lo12:imm
764 blr tmp
765 mrs tp, tpidr_el0
766 add dest, dest, tp
768 Initial Exec:
769 mrs tp, tpidr_el0
770 adrp tmp, :gottprel:imm
771 ldr dest, [tmp, #:gottprel_lo12:imm]
772 add dest, dest, tp
774 Local Exec:
775 mrs tp, tpidr_el0
776 add t0, tp, #:tprel_hi12:imm, lsl #12
777 add t0, t0, #:tprel_lo12_nc:imm
780 static void
781 aarch64_load_symref_appropriately (rtx dest, rtx imm,
782 enum aarch64_symbol_type type)
784 switch (type)
786 case SYMBOL_SMALL_ABSOLUTE:
788 /* In ILP32, the mode of dest can be either SImode or DImode. */
789 rtx tmp_reg = dest;
790 machine_mode mode = GET_MODE (dest);
792 gcc_assert (mode == Pmode || mode == ptr_mode);
794 if (can_create_pseudo_p ())
795 tmp_reg = gen_reg_rtx (mode);
797 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
798 emit_insn (gen_add_losym (dest, tmp_reg, imm));
799 return;
802 case SYMBOL_TINY_ABSOLUTE:
803 emit_insn (gen_rtx_SET (Pmode, dest, imm));
804 return;
806 case SYMBOL_SMALL_GOT:
808 /* In ILP32, the mode of dest can be either SImode or DImode,
809 while the got entry is always of SImode size. The mode of
810 dest depends on how dest is used: if dest is assigned to a
811 pointer (e.g. in the memory), it has SImode; it may have
812 DImode if dest is dereferenced to access the memeory.
813 This is why we have to handle three different ldr_got_small
814 patterns here (two patterns for ILP32). */
815 rtx tmp_reg = dest;
816 machine_mode mode = GET_MODE (dest);
818 if (can_create_pseudo_p ())
819 tmp_reg = gen_reg_rtx (mode);
821 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
822 if (mode == ptr_mode)
824 if (mode == DImode)
825 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
826 else
827 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
829 else
831 gcc_assert (mode == Pmode);
832 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
835 return;
838 case SYMBOL_SMALL_TLSGD:
840 rtx_insn *insns;
841 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
843 start_sequence ();
844 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
845 insns = get_insns ();
846 end_sequence ();
848 RTL_CONST_CALL_P (insns) = 1;
849 emit_libcall_block (insns, dest, result, imm);
850 return;
853 case SYMBOL_SMALL_TLSDESC:
855 machine_mode mode = GET_MODE (dest);
856 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
857 rtx tp;
859 gcc_assert (mode == Pmode || mode == ptr_mode);
861 /* In ILP32, the got entry is always of SImode size. Unlike
862 small GOT, the dest is fixed at reg 0. */
863 if (TARGET_ILP32)
864 emit_insn (gen_tlsdesc_small_si (imm));
865 else
866 emit_insn (gen_tlsdesc_small_di (imm));
867 tp = aarch64_load_tp (NULL);
869 if (mode != Pmode)
870 tp = gen_lowpart (mode, tp);
872 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
873 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
874 return;
877 case SYMBOL_SMALL_GOTTPREL:
879 /* In ILP32, the mode of dest can be either SImode or DImode,
880 while the got entry is always of SImode size. The mode of
881 dest depends on how dest is used: if dest is assigned to a
882 pointer (e.g. in the memory), it has SImode; it may have
883 DImode if dest is dereferenced to access the memeory.
884 This is why we have to handle three different tlsie_small
885 patterns here (two patterns for ILP32). */
886 machine_mode mode = GET_MODE (dest);
887 rtx tmp_reg = gen_reg_rtx (mode);
888 rtx tp = aarch64_load_tp (NULL);
890 if (mode == ptr_mode)
892 if (mode == DImode)
893 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
894 else
896 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
897 tp = gen_lowpart (mode, tp);
900 else
902 gcc_assert (mode == Pmode);
903 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
906 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
907 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
908 return;
911 case SYMBOL_SMALL_TPREL:
913 rtx tp = aarch64_load_tp (NULL);
915 if (GET_MODE (dest) != Pmode)
916 tp = gen_lowpart (GET_MODE (dest), tp);
918 emit_insn (gen_tlsle_small (dest, tp, imm));
919 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
920 return;
923 case SYMBOL_TINY_GOT:
924 emit_insn (gen_ldr_got_tiny (dest, imm));
925 return;
927 default:
928 gcc_unreachable ();
932 /* Emit a move from SRC to DEST. Assume that the move expanders can
933 handle all moves if !can_create_pseudo_p (). The distinction is
934 important because, unlike emit_move_insn, the move expanders know
935 how to force Pmode objects into the constant pool even when the
936 constant pool address is not itself legitimate. */
937 static rtx
938 aarch64_emit_move (rtx dest, rtx src)
940 return (can_create_pseudo_p ()
941 ? emit_move_insn (dest, src)
942 : emit_move_insn_1 (dest, src));
945 /* Split a 128-bit move operation into two 64-bit move operations,
946 taking care to handle partial overlap of register to register
947 copies. Special cases are needed when moving between GP regs and
948 FP regs. SRC can be a register, constant or memory; DST a register
949 or memory. If either operand is memory it must not have any side
950 effects. */
951 void
952 aarch64_split_128bit_move (rtx dst, rtx src)
954 rtx dst_lo, dst_hi;
955 rtx src_lo, src_hi;
957 machine_mode mode = GET_MODE (dst);
959 gcc_assert (mode == TImode || mode == TFmode);
960 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
961 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
963 if (REG_P (dst) && REG_P (src))
965 int src_regno = REGNO (src);
966 int dst_regno = REGNO (dst);
968 /* Handle FP <-> GP regs. */
969 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
971 src_lo = gen_lowpart (word_mode, src);
972 src_hi = gen_highpart (word_mode, src);
974 if (mode == TImode)
976 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
977 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
979 else
981 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
982 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
984 return;
986 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
988 dst_lo = gen_lowpart (word_mode, dst);
989 dst_hi = gen_highpart (word_mode, dst);
991 if (mode == TImode)
993 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
994 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
996 else
998 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
999 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1001 return;
1005 dst_lo = gen_lowpart (word_mode, dst);
1006 dst_hi = gen_highpart (word_mode, dst);
1007 src_lo = gen_lowpart (word_mode, src);
1008 src_hi = gen_highpart_mode (word_mode, mode, src);
1010 /* At most one pairing may overlap. */
1011 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1013 aarch64_emit_move (dst_hi, src_hi);
1014 aarch64_emit_move (dst_lo, src_lo);
1016 else
1018 aarch64_emit_move (dst_lo, src_lo);
1019 aarch64_emit_move (dst_hi, src_hi);
1023 bool
1024 aarch64_split_128bit_move_p (rtx dst, rtx src)
1026 return (! REG_P (src)
1027 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1030 /* Split a complex SIMD combine. */
1032 void
1033 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1035 machine_mode src_mode = GET_MODE (src1);
1036 machine_mode dst_mode = GET_MODE (dst);
1038 gcc_assert (VECTOR_MODE_P (dst_mode));
1040 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1042 rtx (*gen) (rtx, rtx, rtx);
1044 switch (src_mode)
1046 case V8QImode:
1047 gen = gen_aarch64_simd_combinev8qi;
1048 break;
1049 case V4HImode:
1050 gen = gen_aarch64_simd_combinev4hi;
1051 break;
1052 case V2SImode:
1053 gen = gen_aarch64_simd_combinev2si;
1054 break;
1055 case V2SFmode:
1056 gen = gen_aarch64_simd_combinev2sf;
1057 break;
1058 case DImode:
1059 gen = gen_aarch64_simd_combinedi;
1060 break;
1061 case DFmode:
1062 gen = gen_aarch64_simd_combinedf;
1063 break;
1064 default:
1065 gcc_unreachable ();
1068 emit_insn (gen (dst, src1, src2));
1069 return;
1073 /* Split a complex SIMD move. */
1075 void
1076 aarch64_split_simd_move (rtx dst, rtx src)
1078 machine_mode src_mode = GET_MODE (src);
1079 machine_mode dst_mode = GET_MODE (dst);
1081 gcc_assert (VECTOR_MODE_P (dst_mode));
1083 if (REG_P (dst) && REG_P (src))
1085 rtx (*gen) (rtx, rtx);
1087 gcc_assert (VECTOR_MODE_P (src_mode));
1089 switch (src_mode)
1091 case V16QImode:
1092 gen = gen_aarch64_split_simd_movv16qi;
1093 break;
1094 case V8HImode:
1095 gen = gen_aarch64_split_simd_movv8hi;
1096 break;
1097 case V4SImode:
1098 gen = gen_aarch64_split_simd_movv4si;
1099 break;
1100 case V2DImode:
1101 gen = gen_aarch64_split_simd_movv2di;
1102 break;
1103 case V4SFmode:
1104 gen = gen_aarch64_split_simd_movv4sf;
1105 break;
1106 case V2DFmode:
1107 gen = gen_aarch64_split_simd_movv2df;
1108 break;
1109 default:
1110 gcc_unreachable ();
1113 emit_insn (gen (dst, src));
1114 return;
1118 static rtx
1119 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1121 if (can_create_pseudo_p ())
1122 return force_reg (mode, value);
1123 else
1125 x = aarch64_emit_move (x, value);
1126 return x;
1131 static rtx
1132 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1134 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1136 rtx high;
1137 /* Load the full offset into a register. This
1138 might be improvable in the future. */
1139 high = GEN_INT (offset);
1140 offset = 0;
1141 high = aarch64_force_temporary (mode, temp, high);
1142 reg = aarch64_force_temporary (mode, temp,
1143 gen_rtx_PLUS (mode, high, reg));
1145 return plus_constant (mode, reg, offset);
1148 static int
1149 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1150 machine_mode mode)
1152 unsigned HOST_WIDE_INT mask;
1153 int i;
1154 bool first;
1155 unsigned HOST_WIDE_INT val;
1156 bool subtargets;
1157 rtx subtarget;
1158 int one_match, zero_match, first_not_ffff_match;
1159 int num_insns = 0;
1161 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1163 if (generate)
1164 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1165 num_insns++;
1166 return num_insns;
1169 if (mode == SImode)
1171 /* We know we can't do this in 1 insn, and we must be able to do it
1172 in two; so don't mess around looking for sequences that don't buy
1173 us anything. */
1174 if (generate)
1176 emit_insn (gen_rtx_SET (VOIDmode, dest,
1177 GEN_INT (INTVAL (imm) & 0xffff)));
1178 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1179 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1181 num_insns += 2;
1182 return num_insns;
1185 /* Remaining cases are all for DImode. */
1187 val = INTVAL (imm);
1188 subtargets = optimize && can_create_pseudo_p ();
1190 one_match = 0;
1191 zero_match = 0;
1192 mask = 0xffff;
1193 first_not_ffff_match = -1;
1195 for (i = 0; i < 64; i += 16, mask <<= 16)
1197 if ((val & mask) == mask)
1198 one_match++;
1199 else
1201 if (first_not_ffff_match < 0)
1202 first_not_ffff_match = i;
1203 if ((val & mask) == 0)
1204 zero_match++;
1208 if (one_match == 2)
1210 /* Set one of the quarters and then insert back into result. */
1211 mask = 0xffffll << first_not_ffff_match;
1212 if (generate)
1214 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1215 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1216 GEN_INT ((val >> first_not_ffff_match)
1217 & 0xffff)));
1219 num_insns += 2;
1220 return num_insns;
1223 if (zero_match == 2)
1224 goto simple_sequence;
1226 mask = 0x0ffff0000UL;
1227 for (i = 16; i < 64; i += 16, mask <<= 16)
1229 HOST_WIDE_INT comp = mask & ~(mask - 1);
1231 if (aarch64_uimm12_shift (val - (val & mask)))
1233 if (generate)
1235 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1236 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1237 GEN_INT (val & mask)));
1238 emit_insn (gen_adddi3 (dest, subtarget,
1239 GEN_INT (val - (val & mask))));
1241 num_insns += 2;
1242 return num_insns;
1244 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1246 if (generate)
1248 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1249 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1250 GEN_INT ((val + comp) & mask)));
1251 emit_insn (gen_adddi3 (dest, subtarget,
1252 GEN_INT (val - ((val + comp) & mask))));
1254 num_insns += 2;
1255 return num_insns;
1257 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1259 if (generate)
1261 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1262 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1263 GEN_INT ((val - comp) | ~mask)));
1264 emit_insn (gen_adddi3 (dest, subtarget,
1265 GEN_INT (val - ((val - comp) | ~mask))));
1267 num_insns += 2;
1268 return num_insns;
1270 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1272 if (generate)
1274 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1275 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1276 GEN_INT (val | ~mask)));
1277 emit_insn (gen_adddi3 (dest, subtarget,
1278 GEN_INT (val - (val | ~mask))));
1280 num_insns += 2;
1281 return num_insns;
1285 /* See if we can do it by arithmetically combining two
1286 immediates. */
1287 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1289 int j;
1290 mask = 0xffff;
1292 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1293 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1295 if (generate)
1297 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1298 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1299 GEN_INT (aarch64_bitmasks[i])));
1300 emit_insn (gen_adddi3 (dest, subtarget,
1301 GEN_INT (val - aarch64_bitmasks[i])));
1303 num_insns += 2;
1304 return num_insns;
1307 for (j = 0; j < 64; j += 16, mask <<= 16)
1309 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1311 if (generate)
1313 emit_insn (gen_rtx_SET (VOIDmode, dest,
1314 GEN_INT (aarch64_bitmasks[i])));
1315 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1316 GEN_INT ((val >> j) & 0xffff)));
1318 num_insns += 2;
1319 return num_insns;
1324 /* See if we can do it by logically combining two immediates. */
1325 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1327 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1329 int j;
1331 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1332 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1334 if (generate)
1336 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1337 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1338 GEN_INT (aarch64_bitmasks[i])));
1339 emit_insn (gen_iordi3 (dest, subtarget,
1340 GEN_INT (aarch64_bitmasks[j])));
1342 num_insns += 2;
1343 return num_insns;
1346 else if ((val & aarch64_bitmasks[i]) == val)
1348 int j;
1350 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1351 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1353 if (generate)
1355 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1356 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1357 GEN_INT (aarch64_bitmasks[j])));
1358 emit_insn (gen_anddi3 (dest, subtarget,
1359 GEN_INT (aarch64_bitmasks[i])));
1361 num_insns += 2;
1362 return num_insns;
1367 if (one_match > zero_match)
1369 /* Set either first three quarters or all but the third. */
1370 mask = 0xffffll << (16 - first_not_ffff_match);
1371 if (generate)
1372 emit_insn (gen_rtx_SET (VOIDmode, dest,
1373 GEN_INT (val | mask | 0xffffffff00000000ull)));
1374 num_insns ++;
1376 /* Now insert other two quarters. */
1377 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1378 i < 64; i += 16, mask <<= 16)
1380 if ((val & mask) != mask)
1382 if (generate)
1383 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1384 GEN_INT ((val >> i) & 0xffff)));
1385 num_insns ++;
1388 return num_insns;
1391 simple_sequence:
1392 first = true;
1393 mask = 0xffff;
1394 for (i = 0; i < 64; i += 16, mask <<= 16)
1396 if ((val & mask) != 0)
1398 if (first)
1400 if (generate)
1401 emit_insn (gen_rtx_SET (VOIDmode, dest,
1402 GEN_INT (val & mask)));
1403 num_insns ++;
1404 first = false;
1406 else
1408 if (generate)
1409 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1410 GEN_INT ((val >> i) & 0xffff)));
1411 num_insns ++;
1416 return num_insns;
1420 void
1421 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1423 machine_mode mode = GET_MODE (dest);
1425 gcc_assert (mode == SImode || mode == DImode);
1427 /* Check on what type of symbol it is. */
1428 if (GET_CODE (imm) == SYMBOL_REF
1429 || GET_CODE (imm) == LABEL_REF
1430 || GET_CODE (imm) == CONST)
1432 rtx mem, base, offset;
1433 enum aarch64_symbol_type sty;
1435 /* If we have (const (plus symbol offset)), separate out the offset
1436 before we start classifying the symbol. */
1437 split_const (imm, &base, &offset);
1439 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1440 switch (sty)
1442 case SYMBOL_FORCE_TO_MEM:
1443 if (offset != const0_rtx
1444 && targetm.cannot_force_const_mem (mode, imm))
1446 gcc_assert (can_create_pseudo_p ());
1447 base = aarch64_force_temporary (mode, dest, base);
1448 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1449 aarch64_emit_move (dest, base);
1450 return;
1452 mem = force_const_mem (ptr_mode, imm);
1453 gcc_assert (mem);
1454 if (mode != ptr_mode)
1455 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1456 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1457 return;
1459 case SYMBOL_SMALL_TLSGD:
1460 case SYMBOL_SMALL_TLSDESC:
1461 case SYMBOL_SMALL_GOTTPREL:
1462 case SYMBOL_SMALL_GOT:
1463 case SYMBOL_TINY_GOT:
1464 if (offset != const0_rtx)
1466 gcc_assert(can_create_pseudo_p ());
1467 base = aarch64_force_temporary (mode, dest, base);
1468 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1469 aarch64_emit_move (dest, base);
1470 return;
1472 /* FALLTHRU */
1474 case SYMBOL_SMALL_TPREL:
1475 case SYMBOL_SMALL_ABSOLUTE:
1476 case SYMBOL_TINY_ABSOLUTE:
1477 aarch64_load_symref_appropriately (dest, imm, sty);
1478 return;
1480 default:
1481 gcc_unreachable ();
1485 if (!CONST_INT_P (imm))
1487 if (GET_CODE (imm) == HIGH)
1488 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1489 else
1491 rtx mem = force_const_mem (mode, imm);
1492 gcc_assert (mem);
1493 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1496 return;
1499 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1502 static bool
1503 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1504 tree exp ATTRIBUTE_UNUSED)
1506 /* Currently, always true. */
1507 return true;
1510 /* Implement TARGET_PASS_BY_REFERENCE. */
1512 static bool
1513 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1514 machine_mode mode,
1515 const_tree type,
1516 bool named ATTRIBUTE_UNUSED)
1518 HOST_WIDE_INT size;
1519 machine_mode dummymode;
1520 int nregs;
1522 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1523 size = (mode == BLKmode && type)
1524 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1526 /* Aggregates are passed by reference based on their size. */
1527 if (type && AGGREGATE_TYPE_P (type))
1529 size = int_size_in_bytes (type);
1532 /* Variable sized arguments are always returned by reference. */
1533 if (size < 0)
1534 return true;
1536 /* Can this be a candidate to be passed in fp/simd register(s)? */
1537 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1538 &dummymode, &nregs,
1539 NULL))
1540 return false;
1542 /* Arguments which are variable sized or larger than 2 registers are
1543 passed by reference unless they are a homogenous floating point
1544 aggregate. */
1545 return size > 2 * UNITS_PER_WORD;
1548 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1549 static bool
1550 aarch64_return_in_msb (const_tree valtype)
1552 machine_mode dummy_mode;
1553 int dummy_int;
1555 /* Never happens in little-endian mode. */
1556 if (!BYTES_BIG_ENDIAN)
1557 return false;
1559 /* Only composite types smaller than or equal to 16 bytes can
1560 be potentially returned in registers. */
1561 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1562 || int_size_in_bytes (valtype) <= 0
1563 || int_size_in_bytes (valtype) > 16)
1564 return false;
1566 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1567 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1568 is always passed/returned in the least significant bits of fp/simd
1569 register(s). */
1570 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1571 &dummy_mode, &dummy_int, NULL))
1572 return false;
1574 return true;
1577 /* Implement TARGET_FUNCTION_VALUE.
1578 Define how to find the value returned by a function. */
1580 static rtx
1581 aarch64_function_value (const_tree type, const_tree func,
1582 bool outgoing ATTRIBUTE_UNUSED)
1584 machine_mode mode;
1585 int unsignedp;
1586 int count;
1587 machine_mode ag_mode;
1589 mode = TYPE_MODE (type);
1590 if (INTEGRAL_TYPE_P (type))
1591 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1593 if (aarch64_return_in_msb (type))
1595 HOST_WIDE_INT size = int_size_in_bytes (type);
1597 if (size % UNITS_PER_WORD != 0)
1599 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1600 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1604 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1605 &ag_mode, &count, NULL))
1607 if (!aarch64_composite_type_p (type, mode))
1609 gcc_assert (count == 1 && mode == ag_mode);
1610 return gen_rtx_REG (mode, V0_REGNUM);
1612 else
1614 int i;
1615 rtx par;
1617 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1618 for (i = 0; i < count; i++)
1620 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1621 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1622 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1623 XVECEXP (par, 0, i) = tmp;
1625 return par;
1628 else
1629 return gen_rtx_REG (mode, R0_REGNUM);
1632 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1633 Return true if REGNO is the number of a hard register in which the values
1634 of called function may come back. */
1636 static bool
1637 aarch64_function_value_regno_p (const unsigned int regno)
1639 /* Maximum of 16 bytes can be returned in the general registers. Examples
1640 of 16-byte return values are: 128-bit integers and 16-byte small
1641 structures (excluding homogeneous floating-point aggregates). */
1642 if (regno == R0_REGNUM || regno == R1_REGNUM)
1643 return true;
1645 /* Up to four fp/simd registers can return a function value, e.g. a
1646 homogeneous floating-point aggregate having four members. */
1647 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1648 return !TARGET_GENERAL_REGS_ONLY;
1650 return false;
1653 /* Implement TARGET_RETURN_IN_MEMORY.
1655 If the type T of the result of a function is such that
1656 void func (T arg)
1657 would require that arg be passed as a value in a register (or set of
1658 registers) according to the parameter passing rules, then the result
1659 is returned in the same registers as would be used for such an
1660 argument. */
1662 static bool
1663 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1665 HOST_WIDE_INT size;
1666 machine_mode ag_mode;
1667 int count;
1669 if (!AGGREGATE_TYPE_P (type)
1670 && TREE_CODE (type) != COMPLEX_TYPE
1671 && TREE_CODE (type) != VECTOR_TYPE)
1672 /* Simple scalar types always returned in registers. */
1673 return false;
1675 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1676 type,
1677 &ag_mode,
1678 &count,
1679 NULL))
1680 return false;
1682 /* Types larger than 2 registers returned in memory. */
1683 size = int_size_in_bytes (type);
1684 return (size < 0 || size > 2 * UNITS_PER_WORD);
1687 static bool
1688 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1689 const_tree type, int *nregs)
1691 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1692 return aarch64_vfp_is_call_or_return_candidate (mode,
1693 type,
1694 &pcum->aapcs_vfp_rmode,
1695 nregs,
1696 NULL);
1699 /* Given MODE and TYPE of a function argument, return the alignment in
1700 bits. The idea is to suppress any stronger alignment requested by
1701 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1702 This is a helper function for local use only. */
1704 static unsigned int
1705 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1707 unsigned int alignment;
1709 if (type)
1711 if (!integer_zerop (TYPE_SIZE (type)))
1713 if (TYPE_MODE (type) == mode)
1714 alignment = TYPE_ALIGN (type);
1715 else
1716 alignment = GET_MODE_ALIGNMENT (mode);
1718 else
1719 alignment = 0;
1721 else
1722 alignment = GET_MODE_ALIGNMENT (mode);
1724 return alignment;
1727 /* Layout a function argument according to the AAPCS64 rules. The rule
1728 numbers refer to the rule numbers in the AAPCS64. */
1730 static void
1731 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1732 const_tree type,
1733 bool named ATTRIBUTE_UNUSED)
1735 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1736 int ncrn, nvrn, nregs;
1737 bool allocate_ncrn, allocate_nvrn;
1738 HOST_WIDE_INT size;
1740 /* We need to do this once per argument. */
1741 if (pcum->aapcs_arg_processed)
1742 return;
1744 pcum->aapcs_arg_processed = true;
1746 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1747 size
1748 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1749 UNITS_PER_WORD);
1751 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1752 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1753 mode,
1754 type,
1755 &nregs);
1757 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1758 The following code thus handles passing by SIMD/FP registers first. */
1760 nvrn = pcum->aapcs_nvrn;
1762 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1763 and homogenous short-vector aggregates (HVA). */
1764 if (allocate_nvrn)
1766 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1768 pcum->aapcs_nextnvrn = nvrn + nregs;
1769 if (!aarch64_composite_type_p (type, mode))
1771 gcc_assert (nregs == 1);
1772 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1774 else
1776 rtx par;
1777 int i;
1778 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1779 for (i = 0; i < nregs; i++)
1781 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1782 V0_REGNUM + nvrn + i);
1783 tmp = gen_rtx_EXPR_LIST
1784 (VOIDmode, tmp,
1785 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1786 XVECEXP (par, 0, i) = tmp;
1788 pcum->aapcs_reg = par;
1790 return;
1792 else
1794 /* C.3 NSRN is set to 8. */
1795 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1796 goto on_stack;
1800 ncrn = pcum->aapcs_ncrn;
1801 nregs = size / UNITS_PER_WORD;
1803 /* C6 - C9. though the sign and zero extension semantics are
1804 handled elsewhere. This is the case where the argument fits
1805 entirely general registers. */
1806 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1808 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1810 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1812 /* C.8 if the argument has an alignment of 16 then the NGRN is
1813 rounded up to the next even number. */
1814 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1816 ++ncrn;
1817 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1819 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1820 A reg is still generated for it, but the caller should be smart
1821 enough not to use it. */
1822 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1824 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1826 else
1828 rtx par;
1829 int i;
1831 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1832 for (i = 0; i < nregs; i++)
1834 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1835 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1836 GEN_INT (i * UNITS_PER_WORD));
1837 XVECEXP (par, 0, i) = tmp;
1839 pcum->aapcs_reg = par;
1842 pcum->aapcs_nextncrn = ncrn + nregs;
1843 return;
1846 /* C.11 */
1847 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1849 /* The argument is passed on stack; record the needed number of words for
1850 this argument and align the total size if necessary. */
1851 on_stack:
1852 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1853 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1854 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1855 16 / UNITS_PER_WORD);
1856 return;
1859 /* Implement TARGET_FUNCTION_ARG. */
1861 static rtx
1862 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1863 const_tree type, bool named)
1865 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1866 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1868 if (mode == VOIDmode)
1869 return NULL_RTX;
1871 aarch64_layout_arg (pcum_v, mode, type, named);
1872 return pcum->aapcs_reg;
1875 void
1876 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1877 const_tree fntype ATTRIBUTE_UNUSED,
1878 rtx libname ATTRIBUTE_UNUSED,
1879 const_tree fndecl ATTRIBUTE_UNUSED,
1880 unsigned n_named ATTRIBUTE_UNUSED)
1882 pcum->aapcs_ncrn = 0;
1883 pcum->aapcs_nvrn = 0;
1884 pcum->aapcs_nextncrn = 0;
1885 pcum->aapcs_nextnvrn = 0;
1886 pcum->pcs_variant = ARM_PCS_AAPCS64;
1887 pcum->aapcs_reg = NULL_RTX;
1888 pcum->aapcs_arg_processed = false;
1889 pcum->aapcs_stack_words = 0;
1890 pcum->aapcs_stack_size = 0;
1892 return;
1895 static void
1896 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1897 machine_mode mode,
1898 const_tree type,
1899 bool named)
1901 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1902 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1904 aarch64_layout_arg (pcum_v, mode, type, named);
1905 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1906 != (pcum->aapcs_stack_words != 0));
1907 pcum->aapcs_arg_processed = false;
1908 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1909 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1910 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1911 pcum->aapcs_stack_words = 0;
1912 pcum->aapcs_reg = NULL_RTX;
1916 bool
1917 aarch64_function_arg_regno_p (unsigned regno)
1919 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1920 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1923 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1924 PARM_BOUNDARY bits of alignment, but will be given anything up
1925 to STACK_BOUNDARY bits if the type requires it. This makes sure
1926 that both before and after the layout of each argument, the Next
1927 Stacked Argument Address (NSAA) will have a minimum alignment of
1928 8 bytes. */
1930 static unsigned int
1931 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1933 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1935 if (alignment < PARM_BOUNDARY)
1936 alignment = PARM_BOUNDARY;
1937 if (alignment > STACK_BOUNDARY)
1938 alignment = STACK_BOUNDARY;
1939 return alignment;
1942 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1944 Return true if an argument passed on the stack should be padded upwards,
1945 i.e. if the least-significant byte of the stack slot has useful data.
1947 Small aggregate types are placed in the lowest memory address.
1949 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1951 bool
1952 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1954 /* On little-endian targets, the least significant byte of every stack
1955 argument is passed at the lowest byte address of the stack slot. */
1956 if (!BYTES_BIG_ENDIAN)
1957 return true;
1959 /* Otherwise, integral, floating-point and pointer types are padded downward:
1960 the least significant byte of a stack argument is passed at the highest
1961 byte address of the stack slot. */
1962 if (type
1963 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1964 || POINTER_TYPE_P (type))
1965 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1966 return false;
1968 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1969 return true;
1972 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1974 It specifies padding for the last (may also be the only)
1975 element of a block move between registers and memory. If
1976 assuming the block is in the memory, padding upward means that
1977 the last element is padded after its highest significant byte,
1978 while in downward padding, the last element is padded at the
1979 its least significant byte side.
1981 Small aggregates and small complex types are always padded
1982 upwards.
1984 We don't need to worry about homogeneous floating-point or
1985 short-vector aggregates; their move is not affected by the
1986 padding direction determined here. Regardless of endianness,
1987 each element of such an aggregate is put in the least
1988 significant bits of a fp/simd register.
1990 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1991 register has useful data, and return the opposite if the most
1992 significant byte does. */
1994 bool
1995 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
1996 bool first ATTRIBUTE_UNUSED)
1999 /* Small composite types are always padded upward. */
2000 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2002 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2003 : GET_MODE_SIZE (mode));
2004 if (size < 2 * UNITS_PER_WORD)
2005 return true;
2008 /* Otherwise, use the default padding. */
2009 return !BYTES_BIG_ENDIAN;
2012 static machine_mode
2013 aarch64_libgcc_cmp_return_mode (void)
2015 return SImode;
2018 static bool
2019 aarch64_frame_pointer_required (void)
2021 /* In aarch64_override_options_after_change
2022 flag_omit_leaf_frame_pointer turns off the frame pointer by
2023 default. Turn it back on now if we've not got a leaf
2024 function. */
2025 if (flag_omit_leaf_frame_pointer
2026 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2027 return true;
2029 return false;
2032 /* Mark the registers that need to be saved by the callee and calculate
2033 the size of the callee-saved registers area and frame record (both FP
2034 and LR may be omitted). */
2035 static void
2036 aarch64_layout_frame (void)
2038 HOST_WIDE_INT offset = 0;
2039 int regno;
2041 if (reload_completed && cfun->machine->frame.laid_out)
2042 return;
2044 #define SLOT_NOT_REQUIRED (-2)
2045 #define SLOT_REQUIRED (-1)
2047 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2048 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2050 /* First mark all the registers that really need to be saved... */
2051 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2052 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2054 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2055 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2057 /* ... that includes the eh data registers (if needed)... */
2058 if (crtl->calls_eh_return)
2059 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2060 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2061 = SLOT_REQUIRED;
2063 /* ... and any callee saved register that dataflow says is live. */
2064 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2065 if (df_regs_ever_live_p (regno)
2066 && (regno == R30_REGNUM
2067 || !call_used_regs[regno]))
2068 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2070 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2071 if (df_regs_ever_live_p (regno)
2072 && !call_used_regs[regno])
2073 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2075 if (frame_pointer_needed)
2077 /* FP and LR are placed in the linkage record. */
2078 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2079 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2080 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2081 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2082 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2083 offset += 2 * UNITS_PER_WORD;
2086 /* Now assign stack slots for them. */
2087 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2088 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2090 cfun->machine->frame.reg_offset[regno] = offset;
2091 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2092 cfun->machine->frame.wb_candidate1 = regno;
2093 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2094 cfun->machine->frame.wb_candidate2 = regno;
2095 offset += UNITS_PER_WORD;
2098 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2099 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2101 cfun->machine->frame.reg_offset[regno] = offset;
2102 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2103 cfun->machine->frame.wb_candidate1 = regno;
2104 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2105 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2106 cfun->machine->frame.wb_candidate2 = regno;
2107 offset += UNITS_PER_WORD;
2110 cfun->machine->frame.padding0 =
2111 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2112 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2114 cfun->machine->frame.saved_regs_size = offset;
2116 cfun->machine->frame.hard_fp_offset
2117 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2118 + get_frame_size ()
2119 + cfun->machine->frame.saved_regs_size,
2120 STACK_BOUNDARY / BITS_PER_UNIT);
2122 cfun->machine->frame.frame_size
2123 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2124 + crtl->outgoing_args_size,
2125 STACK_BOUNDARY / BITS_PER_UNIT);
2127 cfun->machine->frame.laid_out = true;
2130 static bool
2131 aarch64_register_saved_on_entry (int regno)
2133 return cfun->machine->frame.reg_offset[regno] >= 0;
2136 static unsigned
2137 aarch64_next_callee_save (unsigned regno, unsigned limit)
2139 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2140 regno ++;
2141 return regno;
2144 static void
2145 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2146 HOST_WIDE_INT adjustment)
2148 rtx base_rtx = stack_pointer_rtx;
2149 rtx insn, reg, mem;
2151 reg = gen_rtx_REG (mode, regno);
2152 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2153 plus_constant (Pmode, base_rtx, -adjustment));
2154 mem = gen_rtx_MEM (mode, mem);
2156 insn = emit_move_insn (mem, reg);
2157 RTX_FRAME_RELATED_P (insn) = 1;
2160 static rtx
2161 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2162 HOST_WIDE_INT adjustment)
2164 switch (mode)
2166 case DImode:
2167 return gen_storewb_pairdi_di (base, base, reg, reg2,
2168 GEN_INT (-adjustment),
2169 GEN_INT (UNITS_PER_WORD - adjustment));
2170 case DFmode:
2171 return gen_storewb_pairdf_di (base, base, reg, reg2,
2172 GEN_INT (-adjustment),
2173 GEN_INT (UNITS_PER_WORD - adjustment));
2174 default:
2175 gcc_unreachable ();
2179 static void
2180 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2181 unsigned regno2, HOST_WIDE_INT adjustment)
2183 rtx_insn *insn;
2184 rtx reg1 = gen_rtx_REG (mode, regno1);
2185 rtx reg2 = gen_rtx_REG (mode, regno2);
2187 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2188 reg2, adjustment));
2189 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2190 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2191 RTX_FRAME_RELATED_P (insn) = 1;
2194 static rtx
2195 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2196 HOST_WIDE_INT adjustment)
2198 switch (mode)
2200 case DImode:
2201 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2202 GEN_INT (UNITS_PER_WORD));
2203 case DFmode:
2204 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2205 GEN_INT (UNITS_PER_WORD));
2206 default:
2207 gcc_unreachable ();
2211 static rtx
2212 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2213 rtx reg2)
2215 switch (mode)
2217 case DImode:
2218 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2220 case DFmode:
2221 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2223 default:
2224 gcc_unreachable ();
2228 static rtx
2229 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2230 rtx mem2)
2232 switch (mode)
2234 case DImode:
2235 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2237 case DFmode:
2238 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2240 default:
2241 gcc_unreachable ();
2246 static void
2247 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2248 unsigned start, unsigned limit, bool skip_wb)
2250 rtx_insn *insn;
2251 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2252 ? gen_frame_mem : gen_rtx_MEM);
2253 unsigned regno;
2254 unsigned regno2;
2256 for (regno = aarch64_next_callee_save (start, limit);
2257 regno <= limit;
2258 regno = aarch64_next_callee_save (regno + 1, limit))
2260 rtx reg, mem;
2261 HOST_WIDE_INT offset;
2263 if (skip_wb
2264 && (regno == cfun->machine->frame.wb_candidate1
2265 || regno == cfun->machine->frame.wb_candidate2))
2266 continue;
2268 reg = gen_rtx_REG (mode, regno);
2269 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2270 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2271 offset));
2273 regno2 = aarch64_next_callee_save (regno + 1, limit);
2275 if (regno2 <= limit
2276 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2277 == cfun->machine->frame.reg_offset[regno2]))
2280 rtx reg2 = gen_rtx_REG (mode, regno2);
2281 rtx mem2;
2283 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2284 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2285 offset));
2286 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2287 reg2));
2289 /* The first part of a frame-related parallel insn is
2290 always assumed to be relevant to the frame
2291 calculations; subsequent parts, are only
2292 frame-related if explicitly marked. */
2293 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2294 regno = regno2;
2296 else
2297 insn = emit_move_insn (mem, reg);
2299 RTX_FRAME_RELATED_P (insn) = 1;
2303 static void
2304 aarch64_restore_callee_saves (machine_mode mode,
2305 HOST_WIDE_INT start_offset, unsigned start,
2306 unsigned limit, bool skip_wb, rtx *cfi_ops)
2308 rtx base_rtx = stack_pointer_rtx;
2309 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2310 ? gen_frame_mem : gen_rtx_MEM);
2311 unsigned regno;
2312 unsigned regno2;
2313 HOST_WIDE_INT offset;
2315 for (regno = aarch64_next_callee_save (start, limit);
2316 regno <= limit;
2317 regno = aarch64_next_callee_save (regno + 1, limit))
2319 rtx reg, mem;
2321 if (skip_wb
2322 && (regno == cfun->machine->frame.wb_candidate1
2323 || regno == cfun->machine->frame.wb_candidate2))
2324 continue;
2326 reg = gen_rtx_REG (mode, regno);
2327 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2328 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2330 regno2 = aarch64_next_callee_save (regno + 1, limit);
2332 if (regno2 <= limit
2333 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2334 == cfun->machine->frame.reg_offset[regno2]))
2336 rtx reg2 = gen_rtx_REG (mode, regno2);
2337 rtx mem2;
2339 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2340 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2341 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2343 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2344 regno = regno2;
2346 else
2347 emit_move_insn (reg, mem);
2348 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2352 /* AArch64 stack frames generated by this compiler look like:
2354 +-------------------------------+
2356 | incoming stack arguments |
2358 +-------------------------------+
2359 | | <-- incoming stack pointer (aligned)
2360 | callee-allocated save area |
2361 | for register varargs |
2363 +-------------------------------+
2364 | local variables | <-- frame_pointer_rtx
2366 +-------------------------------+
2367 | padding0 | \
2368 +-------------------------------+ |
2369 | callee-saved registers | | frame.saved_regs_size
2370 +-------------------------------+ |
2371 | LR' | |
2372 +-------------------------------+ |
2373 | FP' | / <- hard_frame_pointer_rtx (aligned)
2374 +-------------------------------+
2375 | dynamic allocation |
2376 +-------------------------------+
2377 | padding |
2378 +-------------------------------+
2379 | outgoing stack arguments | <-- arg_pointer
2381 +-------------------------------+
2382 | | <-- stack_pointer_rtx (aligned)
2384 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2385 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2386 unchanged. */
2388 /* Generate the prologue instructions for entry into a function.
2389 Establish the stack frame by decreasing the stack pointer with a
2390 properly calculated size and, if necessary, create a frame record
2391 filled with the values of LR and previous frame pointer. The
2392 current FP is also set up if it is in use. */
2394 void
2395 aarch64_expand_prologue (void)
2397 /* sub sp, sp, #<frame_size>
2398 stp {fp, lr}, [sp, #<frame_size> - 16]
2399 add fp, sp, #<frame_size> - hardfp_offset
2400 stp {cs_reg}, [fp, #-16] etc.
2402 sub sp, sp, <final_adjustment_if_any>
2404 HOST_WIDE_INT frame_size, offset;
2405 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2406 HOST_WIDE_INT hard_fp_offset;
2407 rtx_insn *insn;
2409 aarch64_layout_frame ();
2411 offset = frame_size = cfun->machine->frame.frame_size;
2412 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2413 fp_offset = frame_size - hard_fp_offset;
2415 if (flag_stack_usage_info)
2416 current_function_static_stack_size = frame_size;
2418 /* Store pairs and load pairs have a range only -512 to 504. */
2419 if (offset >= 512)
2421 /* When the frame has a large size, an initial decrease is done on
2422 the stack pointer to jump over the callee-allocated save area for
2423 register varargs, the local variable area and/or the callee-saved
2424 register area. This will allow the pre-index write-back
2425 store pair instructions to be used for setting up the stack frame
2426 efficiently. */
2427 offset = hard_fp_offset;
2428 if (offset >= 512)
2429 offset = cfun->machine->frame.saved_regs_size;
2431 frame_size -= (offset + crtl->outgoing_args_size);
2432 fp_offset = 0;
2434 if (frame_size >= 0x1000000)
2436 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2437 emit_move_insn (op0, GEN_INT (-frame_size));
2438 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2440 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2441 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2442 plus_constant (Pmode, stack_pointer_rtx,
2443 -frame_size)));
2444 RTX_FRAME_RELATED_P (insn) = 1;
2446 else if (frame_size > 0)
2448 int hi_ofs = frame_size & 0xfff000;
2449 int lo_ofs = frame_size & 0x000fff;
2451 if (hi_ofs)
2453 insn = emit_insn (gen_add2_insn
2454 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2455 RTX_FRAME_RELATED_P (insn) = 1;
2457 if (lo_ofs)
2459 insn = emit_insn (gen_add2_insn
2460 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2461 RTX_FRAME_RELATED_P (insn) = 1;
2465 else
2466 frame_size = -1;
2468 if (offset > 0)
2470 bool skip_wb = false;
2472 if (frame_pointer_needed)
2474 skip_wb = true;
2476 if (fp_offset)
2478 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2479 GEN_INT (-offset)));
2480 RTX_FRAME_RELATED_P (insn) = 1;
2482 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2483 R30_REGNUM, false);
2485 else
2486 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2488 /* Set up frame pointer to point to the location of the
2489 previous frame pointer on the stack. */
2490 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2491 stack_pointer_rtx,
2492 GEN_INT (fp_offset)));
2493 RTX_FRAME_RELATED_P (insn) = 1;
2494 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2496 else
2498 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2499 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2501 if (fp_offset
2502 || reg1 == FIRST_PSEUDO_REGISTER
2503 || (reg2 == FIRST_PSEUDO_REGISTER
2504 && offset >= 256))
2506 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2507 GEN_INT (-offset)));
2508 RTX_FRAME_RELATED_P (insn) = 1;
2510 else
2512 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2514 skip_wb = true;
2516 if (reg2 == FIRST_PSEUDO_REGISTER)
2517 aarch64_pushwb_single_reg (mode1, reg1, offset);
2518 else
2519 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2523 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2524 skip_wb);
2525 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2526 skip_wb);
2529 /* when offset >= 512,
2530 sub sp, sp, #<outgoing_args_size> */
2531 if (frame_size > -1)
2533 if (crtl->outgoing_args_size > 0)
2535 insn = emit_insn (gen_add2_insn
2536 (stack_pointer_rtx,
2537 GEN_INT (- crtl->outgoing_args_size)));
2538 RTX_FRAME_RELATED_P (insn) = 1;
2543 /* Return TRUE if we can use a simple_return insn.
2545 This function checks whether the callee saved stack is empty, which
2546 means no restore actions are need. The pro_and_epilogue will use
2547 this to check whether shrink-wrapping opt is feasible. */
2549 bool
2550 aarch64_use_return_insn_p (void)
2552 if (!reload_completed)
2553 return false;
2555 if (crtl->profile)
2556 return false;
2558 aarch64_layout_frame ();
2560 return cfun->machine->frame.frame_size == 0;
2563 /* Generate the epilogue instructions for returning from a function. */
2564 void
2565 aarch64_expand_epilogue (bool for_sibcall)
2567 HOST_WIDE_INT frame_size, offset;
2568 HOST_WIDE_INT fp_offset;
2569 HOST_WIDE_INT hard_fp_offset;
2570 rtx_insn *insn;
2571 /* We need to add memory barrier to prevent read from deallocated stack. */
2572 bool need_barrier_p = (get_frame_size () != 0
2573 || cfun->machine->frame.saved_varargs_size);
2575 aarch64_layout_frame ();
2577 offset = frame_size = cfun->machine->frame.frame_size;
2578 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2579 fp_offset = frame_size - hard_fp_offset;
2581 /* Store pairs and load pairs have a range only -512 to 504. */
2582 if (offset >= 512)
2584 offset = hard_fp_offset;
2585 if (offset >= 512)
2586 offset = cfun->machine->frame.saved_regs_size;
2588 frame_size -= (offset + crtl->outgoing_args_size);
2589 fp_offset = 0;
2590 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2592 insn = emit_insn (gen_add2_insn
2593 (stack_pointer_rtx,
2594 GEN_INT (crtl->outgoing_args_size)));
2595 RTX_FRAME_RELATED_P (insn) = 1;
2598 else
2599 frame_size = -1;
2601 /* If there were outgoing arguments or we've done dynamic stack
2602 allocation, then restore the stack pointer from the frame
2603 pointer. This is at most one insn and more efficient than using
2604 GCC's internal mechanism. */
2605 if (frame_pointer_needed
2606 && (crtl->outgoing_args_size || cfun->calls_alloca))
2608 if (cfun->calls_alloca)
2609 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2611 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2612 hard_frame_pointer_rtx,
2613 GEN_INT (0)));
2614 offset = offset - fp_offset;
2617 if (offset > 0)
2619 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2620 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2621 bool skip_wb = true;
2622 rtx cfi_ops = NULL;
2624 if (frame_pointer_needed)
2625 fp_offset = 0;
2626 else if (fp_offset
2627 || reg1 == FIRST_PSEUDO_REGISTER
2628 || (reg2 == FIRST_PSEUDO_REGISTER
2629 && offset >= 256))
2630 skip_wb = false;
2632 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2633 skip_wb, &cfi_ops);
2634 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2635 skip_wb, &cfi_ops);
2637 if (need_barrier_p)
2638 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2640 if (skip_wb)
2642 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2643 rtx rreg1 = gen_rtx_REG (mode1, reg1);
2645 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2646 if (reg2 == FIRST_PSEUDO_REGISTER)
2648 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2649 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2650 mem = gen_rtx_MEM (mode1, mem);
2651 insn = emit_move_insn (rreg1, mem);
2653 else
2655 rtx rreg2 = gen_rtx_REG (mode1, reg2);
2657 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2658 insn = emit_insn (aarch64_gen_loadwb_pair
2659 (mode1, stack_pointer_rtx, rreg1,
2660 rreg2, offset));
2663 else
2665 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2666 GEN_INT (offset)));
2669 /* Reset the CFA to be SP + FRAME_SIZE. */
2670 rtx new_cfa = stack_pointer_rtx;
2671 if (frame_size > 0)
2672 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2673 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2674 REG_NOTES (insn) = cfi_ops;
2675 RTX_FRAME_RELATED_P (insn) = 1;
2678 if (frame_size > 0)
2680 if (need_barrier_p)
2681 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2683 if (frame_size >= 0x1000000)
2685 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2686 emit_move_insn (op0, GEN_INT (frame_size));
2687 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2689 else
2691 int hi_ofs = frame_size & 0xfff000;
2692 int lo_ofs = frame_size & 0x000fff;
2694 if (hi_ofs && lo_ofs)
2696 insn = emit_insn (gen_add2_insn
2697 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2698 RTX_FRAME_RELATED_P (insn) = 1;
2699 frame_size = lo_ofs;
2701 insn = emit_insn (gen_add2_insn
2702 (stack_pointer_rtx, GEN_INT (frame_size)));
2705 /* Reset the CFA to be SP + 0. */
2706 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2707 RTX_FRAME_RELATED_P (insn) = 1;
2710 /* Stack adjustment for exception handler. */
2711 if (crtl->calls_eh_return)
2713 /* We need to unwind the stack by the offset computed by
2714 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2715 to be SP; letting the CFA move during this adjustment
2716 is just as correct as retaining the CFA from the body
2717 of the function. Therefore, do nothing special. */
2718 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2721 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2722 if (!for_sibcall)
2723 emit_jump_insn (ret_rtx);
2726 /* Return the place to copy the exception unwinding return address to.
2727 This will probably be a stack slot, but could (in theory be the
2728 return register). */
2730 aarch64_final_eh_return_addr (void)
2732 HOST_WIDE_INT fp_offset;
2734 aarch64_layout_frame ();
2736 fp_offset = cfun->machine->frame.frame_size
2737 - cfun->machine->frame.hard_fp_offset;
2739 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2740 return gen_rtx_REG (DImode, LR_REGNUM);
2742 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2743 result in a store to save LR introduced by builtin_eh_return () being
2744 incorrectly deleted because the alias is not detected.
2745 So in the calculation of the address to copy the exception unwinding
2746 return address to, we note 2 cases.
2747 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2748 we return a SP-relative location since all the addresses are SP-relative
2749 in this case. This prevents the store from being optimized away.
2750 If the fp_offset is not 0, then the addresses will be FP-relative and
2751 therefore we return a FP-relative location. */
2753 if (frame_pointer_needed)
2755 if (fp_offset)
2756 return gen_frame_mem (DImode,
2757 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2758 else
2759 return gen_frame_mem (DImode,
2760 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2763 /* If FP is not needed, we calculate the location of LR, which would be
2764 at the top of the saved registers block. */
2766 return gen_frame_mem (DImode,
2767 plus_constant (Pmode,
2768 stack_pointer_rtx,
2769 fp_offset
2770 + cfun->machine->frame.saved_regs_size
2771 - 2 * UNITS_PER_WORD));
2774 /* Possibly output code to build up a constant in a register. For
2775 the benefit of the costs infrastructure, returns the number of
2776 instructions which would be emitted. GENERATE inhibits or
2777 enables code generation. */
2779 static int
2780 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2782 int insns = 0;
2784 if (aarch64_bitmask_imm (val, DImode))
2786 if (generate)
2787 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2788 insns = 1;
2790 else
2792 int i;
2793 int ncount = 0;
2794 int zcount = 0;
2795 HOST_WIDE_INT valp = val >> 16;
2796 HOST_WIDE_INT valm;
2797 HOST_WIDE_INT tval;
2799 for (i = 16; i < 64; i += 16)
2801 valm = (valp & 0xffff);
2803 if (valm != 0)
2804 ++ zcount;
2806 if (valm != 0xffff)
2807 ++ ncount;
2809 valp >>= 16;
2812 /* zcount contains the number of additional MOVK instructions
2813 required if the constant is built up with an initial MOVZ instruction,
2814 while ncount is the number of MOVK instructions required if starting
2815 with a MOVN instruction. Choose the sequence that yields the fewest
2816 number of instructions, preferring MOVZ instructions when they are both
2817 the same. */
2818 if (ncount < zcount)
2820 if (generate)
2821 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2822 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2823 tval = 0xffff;
2824 insns++;
2826 else
2828 if (generate)
2829 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2830 GEN_INT (val & 0xffff));
2831 tval = 0;
2832 insns++;
2835 val >>= 16;
2837 for (i = 16; i < 64; i += 16)
2839 if ((val & 0xffff) != tval)
2841 if (generate)
2842 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2843 GEN_INT (i),
2844 GEN_INT (val & 0xffff)));
2845 insns++;
2847 val >>= 16;
2850 return insns;
2853 static void
2854 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2856 HOST_WIDE_INT mdelta = delta;
2857 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2858 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2860 if (mdelta < 0)
2861 mdelta = -mdelta;
2863 if (mdelta >= 4096 * 4096)
2865 (void) aarch64_build_constant (scratchreg, delta, true);
2866 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2868 else if (mdelta > 0)
2870 if (mdelta >= 4096)
2872 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2873 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2874 if (delta < 0)
2875 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2876 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2877 else
2878 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2879 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2881 if (mdelta % 4096 != 0)
2883 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2884 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2885 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2890 /* Output code to add DELTA to the first argument, and then jump
2891 to FUNCTION. Used for C++ multiple inheritance. */
2892 static void
2893 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2894 HOST_WIDE_INT delta,
2895 HOST_WIDE_INT vcall_offset,
2896 tree function)
2898 /* The this pointer is always in x0. Note that this differs from
2899 Arm where the this pointer maybe bumped to r1 if r0 is required
2900 to return a pointer to an aggregate. On AArch64 a result value
2901 pointer will be in x8. */
2902 int this_regno = R0_REGNUM;
2903 rtx this_rtx, temp0, temp1, addr, funexp;
2904 rtx_insn *insn;
2906 reload_completed = 1;
2907 emit_note (NOTE_INSN_PROLOGUE_END);
2909 if (vcall_offset == 0)
2910 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2911 else
2913 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2915 this_rtx = gen_rtx_REG (Pmode, this_regno);
2916 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2917 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2919 addr = this_rtx;
2920 if (delta != 0)
2922 if (delta >= -256 && delta < 256)
2923 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2924 plus_constant (Pmode, this_rtx, delta));
2925 else
2926 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2929 if (Pmode == ptr_mode)
2930 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2931 else
2932 aarch64_emit_move (temp0,
2933 gen_rtx_ZERO_EXTEND (Pmode,
2934 gen_rtx_MEM (ptr_mode, addr)));
2936 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2937 addr = plus_constant (Pmode, temp0, vcall_offset);
2938 else
2940 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2941 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2944 if (Pmode == ptr_mode)
2945 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2946 else
2947 aarch64_emit_move (temp1,
2948 gen_rtx_SIGN_EXTEND (Pmode,
2949 gen_rtx_MEM (ptr_mode, addr)));
2951 emit_insn (gen_add2_insn (this_rtx, temp1));
2954 /* Generate a tail call to the target function. */
2955 if (!TREE_USED (function))
2957 assemble_external (function);
2958 TREE_USED (function) = 1;
2960 funexp = XEXP (DECL_RTL (function), 0);
2961 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2962 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2963 SIBLING_CALL_P (insn) = 1;
2965 insn = get_insns ();
2966 shorten_branches (insn);
2967 final_start_function (insn, file, 1);
2968 final (insn, file, 1);
2969 final_end_function ();
2971 /* Stop pretending to be a post-reload pass. */
2972 reload_completed = 0;
2975 static bool
2976 aarch64_tls_referenced_p (rtx x)
2978 if (!TARGET_HAVE_TLS)
2979 return false;
2980 subrtx_iterator::array_type array;
2981 FOR_EACH_SUBRTX (iter, array, x, ALL)
2983 const_rtx x = *iter;
2984 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2985 return true;
2986 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2987 TLS offsets, not real symbol references. */
2988 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2989 iter.skip_subrtxes ();
2991 return false;
2995 static int
2996 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2998 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2999 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3001 if (*imm1 < *imm2)
3002 return -1;
3003 if (*imm1 > *imm2)
3004 return +1;
3005 return 0;
3009 static void
3010 aarch64_build_bitmask_table (void)
3012 unsigned HOST_WIDE_INT mask, imm;
3013 unsigned int log_e, e, s, r;
3014 unsigned int nimms = 0;
3016 for (log_e = 1; log_e <= 6; log_e++)
3018 e = 1 << log_e;
3019 if (e == 64)
3020 mask = ~(HOST_WIDE_INT) 0;
3021 else
3022 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3023 for (s = 1; s < e; s++)
3025 for (r = 0; r < e; r++)
3027 /* set s consecutive bits to 1 (s < 64) */
3028 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3029 /* rotate right by r */
3030 if (r != 0)
3031 imm = ((imm >> r) | (imm << (e - r))) & mask;
3032 /* replicate the constant depending on SIMD size */
3033 switch (log_e) {
3034 case 1: imm |= (imm << 2);
3035 case 2: imm |= (imm << 4);
3036 case 3: imm |= (imm << 8);
3037 case 4: imm |= (imm << 16);
3038 case 5: imm |= (imm << 32);
3039 case 6:
3040 break;
3041 default:
3042 gcc_unreachable ();
3044 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3045 aarch64_bitmasks[nimms++] = imm;
3050 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3051 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3052 aarch64_bitmasks_cmp);
3056 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3057 a left shift of 0 or 12 bits. */
3058 bool
3059 aarch64_uimm12_shift (HOST_WIDE_INT val)
3061 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3062 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3067 /* Return true if val is an immediate that can be loaded into a
3068 register by a MOVZ instruction. */
3069 static bool
3070 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3072 if (GET_MODE_SIZE (mode) > 4)
3074 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3075 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3076 return 1;
3078 else
3080 /* Ignore sign extension. */
3081 val &= (HOST_WIDE_INT) 0xffffffff;
3083 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3084 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3088 /* Return true if val is a valid bitmask immediate. */
3089 bool
3090 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3092 if (GET_MODE_SIZE (mode) < 8)
3094 /* Replicate bit pattern. */
3095 val &= (HOST_WIDE_INT) 0xffffffff;
3096 val |= val << 32;
3098 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3099 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3103 /* Return true if val is an immediate that can be loaded into a
3104 register in a single instruction. */
3105 bool
3106 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3108 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3109 return 1;
3110 return aarch64_bitmask_imm (val, mode);
3113 static bool
3114 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3116 rtx base, offset;
3118 if (GET_CODE (x) == HIGH)
3119 return true;
3121 split_const (x, &base, &offset);
3122 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3124 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3125 != SYMBOL_FORCE_TO_MEM)
3126 return true;
3127 else
3128 /* Avoid generating a 64-bit relocation in ILP32; leave
3129 to aarch64_expand_mov_immediate to handle it properly. */
3130 return mode != ptr_mode;
3133 return aarch64_tls_referenced_p (x);
3136 /* Return true if register REGNO is a valid index register.
3137 STRICT_P is true if REG_OK_STRICT is in effect. */
3139 bool
3140 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3142 if (!HARD_REGISTER_NUM_P (regno))
3144 if (!strict_p)
3145 return true;
3147 if (!reg_renumber)
3148 return false;
3150 regno = reg_renumber[regno];
3152 return GP_REGNUM_P (regno);
3155 /* Return true if register REGNO is a valid base register for mode MODE.
3156 STRICT_P is true if REG_OK_STRICT is in effect. */
3158 bool
3159 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3161 if (!HARD_REGISTER_NUM_P (regno))
3163 if (!strict_p)
3164 return true;
3166 if (!reg_renumber)
3167 return false;
3169 regno = reg_renumber[regno];
3172 /* The fake registers will be eliminated to either the stack or
3173 hard frame pointer, both of which are usually valid base registers.
3174 Reload deals with the cases where the eliminated form isn't valid. */
3175 return (GP_REGNUM_P (regno)
3176 || regno == SP_REGNUM
3177 || regno == FRAME_POINTER_REGNUM
3178 || regno == ARG_POINTER_REGNUM);
3181 /* Return true if X is a valid base register for mode MODE.
3182 STRICT_P is true if REG_OK_STRICT is in effect. */
3184 static bool
3185 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3187 if (!strict_p && GET_CODE (x) == SUBREG)
3188 x = SUBREG_REG (x);
3190 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3193 /* Return true if address offset is a valid index. If it is, fill in INFO
3194 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3196 static bool
3197 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3198 machine_mode mode, bool strict_p)
3200 enum aarch64_address_type type;
3201 rtx index;
3202 int shift;
3204 /* (reg:P) */
3205 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3206 && GET_MODE (x) == Pmode)
3208 type = ADDRESS_REG_REG;
3209 index = x;
3210 shift = 0;
3212 /* (sign_extend:DI (reg:SI)) */
3213 else if ((GET_CODE (x) == SIGN_EXTEND
3214 || GET_CODE (x) == ZERO_EXTEND)
3215 && GET_MODE (x) == DImode
3216 && GET_MODE (XEXP (x, 0)) == SImode)
3218 type = (GET_CODE (x) == SIGN_EXTEND)
3219 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3220 index = XEXP (x, 0);
3221 shift = 0;
3223 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3224 else if (GET_CODE (x) == MULT
3225 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3226 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3227 && GET_MODE (XEXP (x, 0)) == DImode
3228 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3229 && CONST_INT_P (XEXP (x, 1)))
3231 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3232 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3233 index = XEXP (XEXP (x, 0), 0);
3234 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3236 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3237 else if (GET_CODE (x) == ASHIFT
3238 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3239 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3240 && GET_MODE (XEXP (x, 0)) == DImode
3241 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3242 && CONST_INT_P (XEXP (x, 1)))
3244 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3245 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3246 index = XEXP (XEXP (x, 0), 0);
3247 shift = INTVAL (XEXP (x, 1));
3249 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3250 else if ((GET_CODE (x) == SIGN_EXTRACT
3251 || GET_CODE (x) == ZERO_EXTRACT)
3252 && GET_MODE (x) == DImode
3253 && GET_CODE (XEXP (x, 0)) == MULT
3254 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3255 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3257 type = (GET_CODE (x) == SIGN_EXTRACT)
3258 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3259 index = XEXP (XEXP (x, 0), 0);
3260 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3261 if (INTVAL (XEXP (x, 1)) != 32 + shift
3262 || INTVAL (XEXP (x, 2)) != 0)
3263 shift = -1;
3265 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3266 (const_int 0xffffffff<<shift)) */
3267 else if (GET_CODE (x) == AND
3268 && GET_MODE (x) == DImode
3269 && GET_CODE (XEXP (x, 0)) == MULT
3270 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3271 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3272 && CONST_INT_P (XEXP (x, 1)))
3274 type = ADDRESS_REG_UXTW;
3275 index = XEXP (XEXP (x, 0), 0);
3276 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3277 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3278 shift = -1;
3280 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3281 else if ((GET_CODE (x) == SIGN_EXTRACT
3282 || GET_CODE (x) == ZERO_EXTRACT)
3283 && GET_MODE (x) == DImode
3284 && GET_CODE (XEXP (x, 0)) == ASHIFT
3285 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3286 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3288 type = (GET_CODE (x) == SIGN_EXTRACT)
3289 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3290 index = XEXP (XEXP (x, 0), 0);
3291 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3292 if (INTVAL (XEXP (x, 1)) != 32 + shift
3293 || INTVAL (XEXP (x, 2)) != 0)
3294 shift = -1;
3296 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3297 (const_int 0xffffffff<<shift)) */
3298 else if (GET_CODE (x) == AND
3299 && GET_MODE (x) == DImode
3300 && GET_CODE (XEXP (x, 0)) == ASHIFT
3301 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3302 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3303 && CONST_INT_P (XEXP (x, 1)))
3305 type = ADDRESS_REG_UXTW;
3306 index = XEXP (XEXP (x, 0), 0);
3307 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3308 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3309 shift = -1;
3311 /* (mult:P (reg:P) (const_int scale)) */
3312 else if (GET_CODE (x) == MULT
3313 && GET_MODE (x) == Pmode
3314 && GET_MODE (XEXP (x, 0)) == Pmode
3315 && CONST_INT_P (XEXP (x, 1)))
3317 type = ADDRESS_REG_REG;
3318 index = XEXP (x, 0);
3319 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3321 /* (ashift:P (reg:P) (const_int shift)) */
3322 else if (GET_CODE (x) == ASHIFT
3323 && GET_MODE (x) == Pmode
3324 && GET_MODE (XEXP (x, 0)) == Pmode
3325 && CONST_INT_P (XEXP (x, 1)))
3327 type = ADDRESS_REG_REG;
3328 index = XEXP (x, 0);
3329 shift = INTVAL (XEXP (x, 1));
3331 else
3332 return false;
3334 if (GET_CODE (index) == SUBREG)
3335 index = SUBREG_REG (index);
3337 if ((shift == 0 ||
3338 (shift > 0 && shift <= 3
3339 && (1 << shift) == GET_MODE_SIZE (mode)))
3340 && REG_P (index)
3341 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3343 info->type = type;
3344 info->offset = index;
3345 info->shift = shift;
3346 return true;
3349 return false;
3352 bool
3353 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3355 return (offset >= -64 * GET_MODE_SIZE (mode)
3356 && offset < 64 * GET_MODE_SIZE (mode)
3357 && offset % GET_MODE_SIZE (mode) == 0);
3360 static inline bool
3361 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3362 HOST_WIDE_INT offset)
3364 return offset >= -256 && offset < 256;
3367 static inline bool
3368 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3370 return (offset >= 0
3371 && offset < 4096 * GET_MODE_SIZE (mode)
3372 && offset % GET_MODE_SIZE (mode) == 0);
3375 /* Return true if X is a valid address for machine mode MODE. If it is,
3376 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3377 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3379 static bool
3380 aarch64_classify_address (struct aarch64_address_info *info,
3381 rtx x, machine_mode mode,
3382 RTX_CODE outer_code, bool strict_p)
3384 enum rtx_code code = GET_CODE (x);
3385 rtx op0, op1;
3387 /* On BE, we use load/store pair for all large int mode load/stores. */
3388 bool load_store_pair_p = (outer_code == PARALLEL
3389 || (BYTES_BIG_ENDIAN
3390 && aarch64_vect_struct_mode_p (mode)));
3392 bool allow_reg_index_p =
3393 !load_store_pair_p
3394 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3395 && !aarch64_vect_struct_mode_p (mode);
3397 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3398 REG addressing. */
3399 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
3400 && (code != POST_INC && code != REG))
3401 return false;
3403 switch (code)
3405 case REG:
3406 case SUBREG:
3407 info->type = ADDRESS_REG_IMM;
3408 info->base = x;
3409 info->offset = const0_rtx;
3410 return aarch64_base_register_rtx_p (x, strict_p);
3412 case PLUS:
3413 op0 = XEXP (x, 0);
3414 op1 = XEXP (x, 1);
3416 if (! strict_p
3417 && REG_P (op0)
3418 && (op0 == virtual_stack_vars_rtx
3419 || op0 == frame_pointer_rtx
3420 || op0 == arg_pointer_rtx)
3421 && CONST_INT_P (op1))
3423 info->type = ADDRESS_REG_IMM;
3424 info->base = op0;
3425 info->offset = op1;
3427 return true;
3430 if (GET_MODE_SIZE (mode) != 0
3431 && CONST_INT_P (op1)
3432 && aarch64_base_register_rtx_p (op0, strict_p))
3434 HOST_WIDE_INT offset = INTVAL (op1);
3436 info->type = ADDRESS_REG_IMM;
3437 info->base = op0;
3438 info->offset = op1;
3440 /* TImode and TFmode values are allowed in both pairs of X
3441 registers and individual Q registers. The available
3442 address modes are:
3443 X,X: 7-bit signed scaled offset
3444 Q: 9-bit signed offset
3445 We conservatively require an offset representable in either mode.
3447 if (mode == TImode || mode == TFmode)
3448 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3449 && offset_9bit_signed_unscaled_p (mode, offset));
3451 /* A 7bit offset check because OImode will emit a ldp/stp
3452 instruction (only big endian will get here).
3453 For ldp/stp instructions, the offset is scaled for the size of a
3454 single element of the pair. */
3455 if (mode == OImode)
3456 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3458 /* Three 9/12 bit offsets checks because CImode will emit three
3459 ldr/str instructions (only big endian will get here). */
3460 if (mode == CImode)
3461 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3462 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3463 || offset_12bit_unsigned_scaled_p (V16QImode,
3464 offset + 32)));
3466 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3467 instructions (only big endian will get here). */
3468 if (mode == XImode)
3469 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3470 && aarch64_offset_7bit_signed_scaled_p (TImode,
3471 offset + 32));
3473 if (load_store_pair_p)
3474 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3475 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3476 else
3477 return (offset_9bit_signed_unscaled_p (mode, offset)
3478 || offset_12bit_unsigned_scaled_p (mode, offset));
3481 if (allow_reg_index_p)
3483 /* Look for base + (scaled/extended) index register. */
3484 if (aarch64_base_register_rtx_p (op0, strict_p)
3485 && aarch64_classify_index (info, op1, mode, strict_p))
3487 info->base = op0;
3488 return true;
3490 if (aarch64_base_register_rtx_p (op1, strict_p)
3491 && aarch64_classify_index (info, op0, mode, strict_p))
3493 info->base = op1;
3494 return true;
3498 return false;
3500 case POST_INC:
3501 case POST_DEC:
3502 case PRE_INC:
3503 case PRE_DEC:
3504 info->type = ADDRESS_REG_WB;
3505 info->base = XEXP (x, 0);
3506 info->offset = NULL_RTX;
3507 return aarch64_base_register_rtx_p (info->base, strict_p);
3509 case POST_MODIFY:
3510 case PRE_MODIFY:
3511 info->type = ADDRESS_REG_WB;
3512 info->base = XEXP (x, 0);
3513 if (GET_CODE (XEXP (x, 1)) == PLUS
3514 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3515 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3516 && aarch64_base_register_rtx_p (info->base, strict_p))
3518 HOST_WIDE_INT offset;
3519 info->offset = XEXP (XEXP (x, 1), 1);
3520 offset = INTVAL (info->offset);
3522 /* TImode and TFmode values are allowed in both pairs of X
3523 registers and individual Q registers. The available
3524 address modes are:
3525 X,X: 7-bit signed scaled offset
3526 Q: 9-bit signed offset
3527 We conservatively require an offset representable in either mode.
3529 if (mode == TImode || mode == TFmode)
3530 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3531 && offset_9bit_signed_unscaled_p (mode, offset));
3533 if (load_store_pair_p)
3534 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3535 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3536 else
3537 return offset_9bit_signed_unscaled_p (mode, offset);
3539 return false;
3541 case CONST:
3542 case SYMBOL_REF:
3543 case LABEL_REF:
3544 /* load literal: pc-relative constant pool entry. Only supported
3545 for SI mode or larger. */
3546 info->type = ADDRESS_SYMBOLIC;
3548 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
3550 rtx sym, addend;
3552 split_const (x, &sym, &addend);
3553 return (GET_CODE (sym) == LABEL_REF
3554 || (GET_CODE (sym) == SYMBOL_REF
3555 && CONSTANT_POOL_ADDRESS_P (sym)));
3557 return false;
3559 case LO_SUM:
3560 info->type = ADDRESS_LO_SUM;
3561 info->base = XEXP (x, 0);
3562 info->offset = XEXP (x, 1);
3563 if (allow_reg_index_p
3564 && aarch64_base_register_rtx_p (info->base, strict_p))
3566 rtx sym, offs;
3567 split_const (info->offset, &sym, &offs);
3568 if (GET_CODE (sym) == SYMBOL_REF
3569 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3570 == SYMBOL_SMALL_ABSOLUTE))
3572 /* The symbol and offset must be aligned to the access size. */
3573 unsigned int align;
3574 unsigned int ref_size;
3576 if (CONSTANT_POOL_ADDRESS_P (sym))
3577 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3578 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3580 tree exp = SYMBOL_REF_DECL (sym);
3581 align = TYPE_ALIGN (TREE_TYPE (exp));
3582 align = CONSTANT_ALIGNMENT (exp, align);
3584 else if (SYMBOL_REF_DECL (sym))
3585 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3586 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3587 && SYMBOL_REF_BLOCK (sym) != NULL)
3588 align = SYMBOL_REF_BLOCK (sym)->alignment;
3589 else
3590 align = BITS_PER_UNIT;
3592 ref_size = GET_MODE_SIZE (mode);
3593 if (ref_size == 0)
3594 ref_size = GET_MODE_SIZE (DImode);
3596 return ((INTVAL (offs) & (ref_size - 1)) == 0
3597 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3600 return false;
3602 default:
3603 return false;
3607 bool
3608 aarch64_symbolic_address_p (rtx x)
3610 rtx offset;
3612 split_const (x, &x, &offset);
3613 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3616 /* Classify the base of symbolic expression X, given that X appears in
3617 context CONTEXT. */
3619 enum aarch64_symbol_type
3620 aarch64_classify_symbolic_expression (rtx x,
3621 enum aarch64_symbol_context context)
3623 rtx offset;
3625 split_const (x, &x, &offset);
3626 return aarch64_classify_symbol (x, offset, context);
3630 /* Return TRUE if X is a legitimate address for accessing memory in
3631 mode MODE. */
3632 static bool
3633 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3635 struct aarch64_address_info addr;
3637 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3640 /* Return TRUE if X is a legitimate address for accessing memory in
3641 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3642 pair operation. */
3643 bool
3644 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3645 RTX_CODE outer_code, bool strict_p)
3647 struct aarch64_address_info addr;
3649 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3652 /* Return TRUE if rtx X is immediate constant 0.0 */
3653 bool
3654 aarch64_float_const_zero_rtx_p (rtx x)
3656 REAL_VALUE_TYPE r;
3658 if (GET_MODE (x) == VOIDmode)
3659 return false;
3661 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3662 if (REAL_VALUE_MINUS_ZERO (r))
3663 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3664 return REAL_VALUES_EQUAL (r, dconst0);
3667 /* Return the fixed registers used for condition codes. */
3669 static bool
3670 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3672 *p1 = CC_REGNUM;
3673 *p2 = INVALID_REGNUM;
3674 return true;
3677 /* Emit call insn with PAT and do aarch64-specific handling. */
3679 void
3680 aarch64_emit_call_insn (rtx pat)
3682 rtx insn = emit_call_insn (pat);
3684 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3685 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3686 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3689 machine_mode
3690 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3692 /* All floating point compares return CCFP if it is an equality
3693 comparison, and CCFPE otherwise. */
3694 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3696 switch (code)
3698 case EQ:
3699 case NE:
3700 case UNORDERED:
3701 case ORDERED:
3702 case UNLT:
3703 case UNLE:
3704 case UNGT:
3705 case UNGE:
3706 case UNEQ:
3707 case LTGT:
3708 return CCFPmode;
3710 case LT:
3711 case LE:
3712 case GT:
3713 case GE:
3714 return CCFPEmode;
3716 default:
3717 gcc_unreachable ();
3721 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3722 && y == const0_rtx
3723 && (code == EQ || code == NE || code == LT || code == GE)
3724 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3725 || GET_CODE (x) == NEG))
3726 return CC_NZmode;
3728 /* A compare with a shifted operand. Because of canonicalization,
3729 the comparison will have to be swapped when we emit the assembly
3730 code. */
3731 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3732 && (REG_P (y) || GET_CODE (y) == SUBREG)
3733 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3734 || GET_CODE (x) == LSHIFTRT
3735 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3736 return CC_SWPmode;
3738 /* Similarly for a negated operand, but we can only do this for
3739 equalities. */
3740 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3741 && (REG_P (y) || GET_CODE (y) == SUBREG)
3742 && (code == EQ || code == NE)
3743 && GET_CODE (x) == NEG)
3744 return CC_Zmode;
3746 /* A compare of a mode narrower than SI mode against zero can be done
3747 by extending the value in the comparison. */
3748 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3749 && y == const0_rtx)
3750 /* Only use sign-extension if we really need it. */
3751 return ((code == GT || code == GE || code == LE || code == LT)
3752 ? CC_SESWPmode : CC_ZESWPmode);
3754 /* For everything else, return CCmode. */
3755 return CCmode;
3758 static int
3759 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3762 aarch64_get_condition_code (rtx x)
3764 machine_mode mode = GET_MODE (XEXP (x, 0));
3765 enum rtx_code comp_code = GET_CODE (x);
3767 if (GET_MODE_CLASS (mode) != MODE_CC)
3768 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3769 return aarch64_get_condition_code_1 (mode, comp_code);
3772 static int
3773 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3775 int ne = -1, eq = -1;
3776 switch (mode)
3778 case CCFPmode:
3779 case CCFPEmode:
3780 switch (comp_code)
3782 case GE: return AARCH64_GE;
3783 case GT: return AARCH64_GT;
3784 case LE: return AARCH64_LS;
3785 case LT: return AARCH64_MI;
3786 case NE: return AARCH64_NE;
3787 case EQ: return AARCH64_EQ;
3788 case ORDERED: return AARCH64_VC;
3789 case UNORDERED: return AARCH64_VS;
3790 case UNLT: return AARCH64_LT;
3791 case UNLE: return AARCH64_LE;
3792 case UNGT: return AARCH64_HI;
3793 case UNGE: return AARCH64_PL;
3794 default: return -1;
3796 break;
3798 case CC_DNEmode:
3799 ne = AARCH64_NE;
3800 eq = AARCH64_EQ;
3801 break;
3803 case CC_DEQmode:
3804 ne = AARCH64_EQ;
3805 eq = AARCH64_NE;
3806 break;
3808 case CC_DGEmode:
3809 ne = AARCH64_GE;
3810 eq = AARCH64_LT;
3811 break;
3813 case CC_DLTmode:
3814 ne = AARCH64_LT;
3815 eq = AARCH64_GE;
3816 break;
3818 case CC_DGTmode:
3819 ne = AARCH64_GT;
3820 eq = AARCH64_LE;
3821 break;
3823 case CC_DLEmode:
3824 ne = AARCH64_LE;
3825 eq = AARCH64_GT;
3826 break;
3828 case CC_DGEUmode:
3829 ne = AARCH64_CS;
3830 eq = AARCH64_CC;
3831 break;
3833 case CC_DLTUmode:
3834 ne = AARCH64_CC;
3835 eq = AARCH64_CS;
3836 break;
3838 case CC_DGTUmode:
3839 ne = AARCH64_HI;
3840 eq = AARCH64_LS;
3841 break;
3843 case CC_DLEUmode:
3844 ne = AARCH64_LS;
3845 eq = AARCH64_HI;
3846 break;
3848 case CCmode:
3849 switch (comp_code)
3851 case NE: return AARCH64_NE;
3852 case EQ: return AARCH64_EQ;
3853 case GE: return AARCH64_GE;
3854 case GT: return AARCH64_GT;
3855 case LE: return AARCH64_LE;
3856 case LT: return AARCH64_LT;
3857 case GEU: return AARCH64_CS;
3858 case GTU: return AARCH64_HI;
3859 case LEU: return AARCH64_LS;
3860 case LTU: return AARCH64_CC;
3861 default: return -1;
3863 break;
3865 case CC_SWPmode:
3866 case CC_ZESWPmode:
3867 case CC_SESWPmode:
3868 switch (comp_code)
3870 case NE: return AARCH64_NE;
3871 case EQ: return AARCH64_EQ;
3872 case GE: return AARCH64_LE;
3873 case GT: return AARCH64_LT;
3874 case LE: return AARCH64_GE;
3875 case LT: return AARCH64_GT;
3876 case GEU: return AARCH64_LS;
3877 case GTU: return AARCH64_CC;
3878 case LEU: return AARCH64_CS;
3879 case LTU: return AARCH64_HI;
3880 default: return -1;
3882 break;
3884 case CC_NZmode:
3885 switch (comp_code)
3887 case NE: return AARCH64_NE;
3888 case EQ: return AARCH64_EQ;
3889 case GE: return AARCH64_PL;
3890 case LT: return AARCH64_MI;
3891 default: return -1;
3893 break;
3895 case CC_Zmode:
3896 switch (comp_code)
3898 case NE: return AARCH64_NE;
3899 case EQ: return AARCH64_EQ;
3900 default: return -1;
3902 break;
3904 default:
3905 return -1;
3906 break;
3909 if (comp_code == NE)
3910 return ne;
3912 if (comp_code == EQ)
3913 return eq;
3915 return -1;
3918 bool
3919 aarch64_const_vec_all_same_in_range_p (rtx x,
3920 HOST_WIDE_INT minval,
3921 HOST_WIDE_INT maxval)
3923 HOST_WIDE_INT firstval;
3924 int count, i;
3926 if (GET_CODE (x) != CONST_VECTOR
3927 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3928 return false;
3930 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3931 if (firstval < minval || firstval > maxval)
3932 return false;
3934 count = CONST_VECTOR_NUNITS (x);
3935 for (i = 1; i < count; i++)
3936 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3937 return false;
3939 return true;
3942 bool
3943 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3945 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3948 static unsigned
3949 bit_count (unsigned HOST_WIDE_INT value)
3951 unsigned count = 0;
3953 while (value)
3955 count++;
3956 value &= value - 1;
3959 return count;
3962 /* N Z C V. */
3963 #define AARCH64_CC_V 1
3964 #define AARCH64_CC_C (1 << 1)
3965 #define AARCH64_CC_Z (1 << 2)
3966 #define AARCH64_CC_N (1 << 3)
3968 /* N Z C V flags for ccmp. The first code is for AND op and the other
3969 is for IOR op. Indexed by AARCH64_COND_CODE. */
3970 static const int aarch64_nzcv_codes[][2] =
3972 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3973 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3974 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3975 {0, AARCH64_CC_C}, /* CC, C == 0. */
3976 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3977 {0, AARCH64_CC_N}, /* PL, N == 0. */
3978 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3979 {0, AARCH64_CC_V}, /* VC, V == 0. */
3980 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3981 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3982 {0, AARCH64_CC_V}, /* GE, N == V. */
3983 {AARCH64_CC_V, 0}, /* LT, N != V. */
3984 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3985 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3986 {0, 0}, /* AL, Any. */
3987 {0, 0}, /* NV, Any. */
3991 aarch64_ccmp_mode_to_code (enum machine_mode mode)
3993 switch (mode)
3995 case CC_DNEmode:
3996 return NE;
3998 case CC_DEQmode:
3999 return EQ;
4001 case CC_DLEmode:
4002 return LE;
4004 case CC_DGTmode:
4005 return GT;
4007 case CC_DLTmode:
4008 return LT;
4010 case CC_DGEmode:
4011 return GE;
4013 case CC_DLEUmode:
4014 return LEU;
4016 case CC_DGTUmode:
4017 return GTU;
4019 case CC_DLTUmode:
4020 return LTU;
4022 case CC_DGEUmode:
4023 return GEU;
4025 default:
4026 gcc_unreachable ();
4031 void
4032 aarch64_print_operand (FILE *f, rtx x, char code)
4034 switch (code)
4036 /* An integer or symbol address without a preceding # sign. */
4037 case 'c':
4038 switch (GET_CODE (x))
4040 case CONST_INT:
4041 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4042 break;
4044 case SYMBOL_REF:
4045 output_addr_const (f, x);
4046 break;
4048 case CONST:
4049 if (GET_CODE (XEXP (x, 0)) == PLUS
4050 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4052 output_addr_const (f, x);
4053 break;
4055 /* Fall through. */
4057 default:
4058 output_operand_lossage ("Unsupported operand for code '%c'", code);
4060 break;
4062 case 'e':
4063 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4065 int n;
4067 if (!CONST_INT_P (x)
4068 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4070 output_operand_lossage ("invalid operand for '%%%c'", code);
4071 return;
4074 switch (n)
4076 case 3:
4077 fputc ('b', f);
4078 break;
4079 case 4:
4080 fputc ('h', f);
4081 break;
4082 case 5:
4083 fputc ('w', f);
4084 break;
4085 default:
4086 output_operand_lossage ("invalid operand for '%%%c'", code);
4087 return;
4090 break;
4092 case 'p':
4094 int n;
4096 /* Print N such that 2^N == X. */
4097 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4099 output_operand_lossage ("invalid operand for '%%%c'", code);
4100 return;
4103 asm_fprintf (f, "%d", n);
4105 break;
4107 case 'P':
4108 /* Print the number of non-zero bits in X (a const_int). */
4109 if (!CONST_INT_P (x))
4111 output_operand_lossage ("invalid operand for '%%%c'", code);
4112 return;
4115 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4116 break;
4118 case 'H':
4119 /* Print the higher numbered register of a pair (TImode) of regs. */
4120 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4122 output_operand_lossage ("invalid operand for '%%%c'", code);
4123 return;
4126 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4127 break;
4129 case 'm':
4131 int cond_code;
4132 /* Print a condition (eq, ne, etc). */
4134 /* CONST_TRUE_RTX means always -- that's the default. */
4135 if (x == const_true_rtx)
4136 return;
4138 if (!COMPARISON_P (x))
4140 output_operand_lossage ("invalid operand for '%%%c'", code);
4141 return;
4144 cond_code = aarch64_get_condition_code (x);
4145 gcc_assert (cond_code >= 0);
4146 fputs (aarch64_condition_codes[cond_code], f);
4148 break;
4150 case 'M':
4152 int cond_code;
4153 /* Print the inverse of a condition (eq <-> ne, etc). */
4155 /* CONST_TRUE_RTX means never -- that's the default. */
4156 if (x == const_true_rtx)
4158 fputs ("nv", f);
4159 return;
4162 if (!COMPARISON_P (x))
4164 output_operand_lossage ("invalid operand for '%%%c'", code);
4165 return;
4167 cond_code = aarch64_get_condition_code (x);
4168 gcc_assert (cond_code >= 0);
4169 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4170 (cond_code)], f);
4172 break;
4174 case 'b':
4175 case 'h':
4176 case 's':
4177 case 'd':
4178 case 'q':
4179 /* Print a scalar FP/SIMD register name. */
4180 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4182 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4183 return;
4185 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4186 break;
4188 case 'S':
4189 case 'T':
4190 case 'U':
4191 case 'V':
4192 /* Print the first FP/SIMD register name in a list. */
4193 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4195 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4196 return;
4198 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4199 break;
4201 case 'R':
4202 /* Print a scalar FP/SIMD register name + 1. */
4203 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4205 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4206 return;
4208 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4209 break;
4211 case 'X':
4212 /* Print bottom 16 bits of integer constant in hex. */
4213 if (!CONST_INT_P (x))
4215 output_operand_lossage ("invalid operand for '%%%c'", code);
4216 return;
4218 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4219 break;
4221 case 'w':
4222 case 'x':
4223 /* Print a general register name or the zero register (32-bit or
4224 64-bit). */
4225 if (x == const0_rtx
4226 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4228 asm_fprintf (f, "%czr", code);
4229 break;
4232 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4234 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4235 break;
4238 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4240 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4241 break;
4244 /* Fall through */
4246 case 0:
4247 /* Print a normal operand, if it's a general register, then we
4248 assume DImode. */
4249 if (x == NULL)
4251 output_operand_lossage ("missing operand");
4252 return;
4255 switch (GET_CODE (x))
4257 case REG:
4258 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4259 break;
4261 case MEM:
4262 aarch64_memory_reference_mode = GET_MODE (x);
4263 output_address (XEXP (x, 0));
4264 break;
4266 case LABEL_REF:
4267 case SYMBOL_REF:
4268 output_addr_const (asm_out_file, x);
4269 break;
4271 case CONST_INT:
4272 asm_fprintf (f, "%wd", INTVAL (x));
4273 break;
4275 case CONST_VECTOR:
4276 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4278 gcc_assert (
4279 aarch64_const_vec_all_same_in_range_p (x,
4280 HOST_WIDE_INT_MIN,
4281 HOST_WIDE_INT_MAX));
4282 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4284 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4286 fputc ('0', f);
4288 else
4289 gcc_unreachable ();
4290 break;
4292 case CONST_DOUBLE:
4293 /* CONST_DOUBLE can represent a double-width integer.
4294 In this case, the mode of x is VOIDmode. */
4295 if (GET_MODE (x) == VOIDmode)
4296 ; /* Do Nothing. */
4297 else if (aarch64_float_const_zero_rtx_p (x))
4299 fputc ('0', f);
4300 break;
4302 else if (aarch64_float_const_representable_p (x))
4304 #define buf_size 20
4305 char float_buf[buf_size] = {'\0'};
4306 REAL_VALUE_TYPE r;
4307 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4308 real_to_decimal_for_mode (float_buf, &r,
4309 buf_size, buf_size,
4310 1, GET_MODE (x));
4311 asm_fprintf (asm_out_file, "%s", float_buf);
4312 break;
4313 #undef buf_size
4315 output_operand_lossage ("invalid constant");
4316 return;
4317 default:
4318 output_operand_lossage ("invalid operand");
4319 return;
4321 break;
4323 case 'A':
4324 if (GET_CODE (x) == HIGH)
4325 x = XEXP (x, 0);
4327 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4329 case SYMBOL_SMALL_GOT:
4330 asm_fprintf (asm_out_file, ":got:");
4331 break;
4333 case SYMBOL_SMALL_TLSGD:
4334 asm_fprintf (asm_out_file, ":tlsgd:");
4335 break;
4337 case SYMBOL_SMALL_TLSDESC:
4338 asm_fprintf (asm_out_file, ":tlsdesc:");
4339 break;
4341 case SYMBOL_SMALL_GOTTPREL:
4342 asm_fprintf (asm_out_file, ":gottprel:");
4343 break;
4345 case SYMBOL_SMALL_TPREL:
4346 asm_fprintf (asm_out_file, ":tprel:");
4347 break;
4349 case SYMBOL_TINY_GOT:
4350 gcc_unreachable ();
4351 break;
4353 default:
4354 break;
4356 output_addr_const (asm_out_file, x);
4357 break;
4359 case 'L':
4360 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4362 case SYMBOL_SMALL_GOT:
4363 asm_fprintf (asm_out_file, ":lo12:");
4364 break;
4366 case SYMBOL_SMALL_TLSGD:
4367 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4368 break;
4370 case SYMBOL_SMALL_TLSDESC:
4371 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4372 break;
4374 case SYMBOL_SMALL_GOTTPREL:
4375 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4376 break;
4378 case SYMBOL_SMALL_TPREL:
4379 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4380 break;
4382 case SYMBOL_TINY_GOT:
4383 asm_fprintf (asm_out_file, ":got:");
4384 break;
4386 default:
4387 break;
4389 output_addr_const (asm_out_file, x);
4390 break;
4392 case 'G':
4394 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4396 case SYMBOL_SMALL_TPREL:
4397 asm_fprintf (asm_out_file, ":tprel_hi12:");
4398 break;
4399 default:
4400 break;
4402 output_addr_const (asm_out_file, x);
4403 break;
4405 case 'K':
4407 int cond_code;
4408 /* Print nzcv. */
4410 if (!COMPARISON_P (x))
4412 output_operand_lossage ("invalid operand for '%%%c'", code);
4413 return;
4416 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4417 gcc_assert (cond_code >= 0);
4418 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4420 break;
4422 case 'k':
4424 int cond_code;
4425 /* Print nzcv. */
4427 if (!COMPARISON_P (x))
4429 output_operand_lossage ("invalid operand for '%%%c'", code);
4430 return;
4433 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4434 gcc_assert (cond_code >= 0);
4435 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4437 break;
4439 default:
4440 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4441 return;
4445 void
4446 aarch64_print_operand_address (FILE *f, rtx x)
4448 struct aarch64_address_info addr;
4450 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4451 MEM, true))
4452 switch (addr.type)
4454 case ADDRESS_REG_IMM:
4455 if (addr.offset == const0_rtx)
4456 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4457 else
4458 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4459 INTVAL (addr.offset));
4460 return;
4462 case ADDRESS_REG_REG:
4463 if (addr.shift == 0)
4464 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4465 reg_names [REGNO (addr.offset)]);
4466 else
4467 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4468 reg_names [REGNO (addr.offset)], addr.shift);
4469 return;
4471 case ADDRESS_REG_UXTW:
4472 if (addr.shift == 0)
4473 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4474 REGNO (addr.offset) - R0_REGNUM);
4475 else
4476 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4477 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4478 return;
4480 case ADDRESS_REG_SXTW:
4481 if (addr.shift == 0)
4482 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4483 REGNO (addr.offset) - R0_REGNUM);
4484 else
4485 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4486 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4487 return;
4489 case ADDRESS_REG_WB:
4490 switch (GET_CODE (x))
4492 case PRE_INC:
4493 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4494 GET_MODE_SIZE (aarch64_memory_reference_mode));
4495 return;
4496 case POST_INC:
4497 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4498 GET_MODE_SIZE (aarch64_memory_reference_mode));
4499 return;
4500 case PRE_DEC:
4501 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4502 GET_MODE_SIZE (aarch64_memory_reference_mode));
4503 return;
4504 case POST_DEC:
4505 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4506 GET_MODE_SIZE (aarch64_memory_reference_mode));
4507 return;
4508 case PRE_MODIFY:
4509 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4510 INTVAL (addr.offset));
4511 return;
4512 case POST_MODIFY:
4513 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4514 INTVAL (addr.offset));
4515 return;
4516 default:
4517 break;
4519 break;
4521 case ADDRESS_LO_SUM:
4522 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4523 output_addr_const (f, addr.offset);
4524 asm_fprintf (f, "]");
4525 return;
4527 case ADDRESS_SYMBOLIC:
4528 break;
4531 output_addr_const (f, x);
4534 bool
4535 aarch64_label_mentioned_p (rtx x)
4537 const char *fmt;
4538 int i;
4540 if (GET_CODE (x) == LABEL_REF)
4541 return true;
4543 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4544 referencing instruction, but they are constant offsets, not
4545 symbols. */
4546 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4547 return false;
4549 fmt = GET_RTX_FORMAT (GET_CODE (x));
4550 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4552 if (fmt[i] == 'E')
4554 int j;
4556 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4557 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4558 return 1;
4560 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4561 return 1;
4564 return 0;
4567 /* Implement REGNO_REG_CLASS. */
4569 enum reg_class
4570 aarch64_regno_regclass (unsigned regno)
4572 if (GP_REGNUM_P (regno))
4573 return GENERAL_REGS;
4575 if (regno == SP_REGNUM)
4576 return STACK_REG;
4578 if (regno == FRAME_POINTER_REGNUM
4579 || regno == ARG_POINTER_REGNUM)
4580 return POINTER_REGS;
4582 if (FP_REGNUM_P (regno))
4583 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4585 return NO_REGS;
4588 static rtx
4589 aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
4591 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4592 where mask is selected by alignment and size of the offset.
4593 We try to pick as large a range for the offset as possible to
4594 maximize the chance of a CSE. However, for aligned addresses
4595 we limit the range to 4k so that structures with different sized
4596 elements are likely to use the same base. */
4598 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4600 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4601 HOST_WIDE_INT base_offset;
4603 /* Does it look like we'll need a load/store-pair operation? */
4604 if (GET_MODE_SIZE (mode) > 16
4605 || mode == TImode)
4606 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4607 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4608 /* For offsets aren't a multiple of the access size, the limit is
4609 -256...255. */
4610 else if (offset & (GET_MODE_SIZE (mode) - 1))
4611 base_offset = (offset + 0x100) & ~0x1ff;
4612 else
4613 base_offset = offset & ~0xfff;
4615 if (base_offset == 0)
4616 return x;
4618 offset -= base_offset;
4619 rtx base_reg = gen_reg_rtx (Pmode);
4620 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4621 NULL_RTX);
4622 emit_move_insn (base_reg, val);
4623 x = plus_constant (Pmode, base_reg, offset);
4626 return x;
4629 /* Try a machine-dependent way of reloading an illegitimate address
4630 operand. If we find one, push the reload and return the new rtx. */
4633 aarch64_legitimize_reload_address (rtx *x_p,
4634 machine_mode mode,
4635 int opnum, int type,
4636 int ind_levels ATTRIBUTE_UNUSED)
4638 rtx x = *x_p;
4640 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4641 if (aarch64_vect_struct_mode_p (mode)
4642 && GET_CODE (x) == PLUS
4643 && REG_P (XEXP (x, 0))
4644 && CONST_INT_P (XEXP (x, 1)))
4646 rtx orig_rtx = x;
4647 x = copy_rtx (x);
4648 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4649 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4650 opnum, (enum reload_type) type);
4651 return x;
4654 /* We must recognize output that we have already generated ourselves. */
4655 if (GET_CODE (x) == PLUS
4656 && GET_CODE (XEXP (x, 0)) == PLUS
4657 && REG_P (XEXP (XEXP (x, 0), 0))
4658 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4659 && CONST_INT_P (XEXP (x, 1)))
4661 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4662 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4663 opnum, (enum reload_type) type);
4664 return x;
4667 /* We wish to handle large displacements off a base register by splitting
4668 the addend across an add and the mem insn. This can cut the number of
4669 extra insns needed from 3 to 1. It is only useful for load/store of a
4670 single register with 12 bit offset field. */
4671 if (GET_CODE (x) == PLUS
4672 && REG_P (XEXP (x, 0))
4673 && CONST_INT_P (XEXP (x, 1))
4674 && HARD_REGISTER_P (XEXP (x, 0))
4675 && mode != TImode
4676 && mode != TFmode
4677 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4679 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4680 HOST_WIDE_INT low = val & 0xfff;
4681 HOST_WIDE_INT high = val - low;
4682 HOST_WIDE_INT offs;
4683 rtx cst;
4684 machine_mode xmode = GET_MODE (x);
4686 /* In ILP32, xmode can be either DImode or SImode. */
4687 gcc_assert (xmode == DImode || xmode == SImode);
4689 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4690 BLKmode alignment. */
4691 if (GET_MODE_SIZE (mode) == 0)
4692 return NULL_RTX;
4694 offs = low % GET_MODE_SIZE (mode);
4696 /* Align misaligned offset by adjusting high part to compensate. */
4697 if (offs != 0)
4699 if (aarch64_uimm12_shift (high + offs))
4701 /* Align down. */
4702 low = low - offs;
4703 high = high + offs;
4705 else
4707 /* Align up. */
4708 offs = GET_MODE_SIZE (mode) - offs;
4709 low = low + offs;
4710 high = high + (low & 0x1000) - offs;
4711 low &= 0xfff;
4715 /* Check for overflow. */
4716 if (high + low != val)
4717 return NULL_RTX;
4719 cst = GEN_INT (high);
4720 if (!aarch64_uimm12_shift (high))
4721 cst = force_const_mem (xmode, cst);
4723 /* Reload high part into base reg, leaving the low part
4724 in the mem instruction.
4725 Note that replacing this gen_rtx_PLUS with plus_constant is
4726 wrong in this case because we rely on the
4727 (plus (plus reg c1) c2) structure being preserved so that
4728 XEXP (*p, 0) in push_reload below uses the correct term. */
4729 x = gen_rtx_PLUS (xmode,
4730 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4731 GEN_INT (low));
4733 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4734 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4735 opnum, (enum reload_type) type);
4736 return x;
4739 return NULL_RTX;
4743 static reg_class_t
4744 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4745 reg_class_t rclass,
4746 machine_mode mode,
4747 secondary_reload_info *sri)
4749 /* Without the TARGET_SIMD instructions we cannot move a Q register
4750 to a Q register directly. We need a scratch. */
4751 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4752 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4753 && reg_class_subset_p (rclass, FP_REGS))
4755 if (mode == TFmode)
4756 sri->icode = CODE_FOR_aarch64_reload_movtf;
4757 else if (mode == TImode)
4758 sri->icode = CODE_FOR_aarch64_reload_movti;
4759 return NO_REGS;
4762 /* A TFmode or TImode memory access should be handled via an FP_REGS
4763 because AArch64 has richer addressing modes for LDR/STR instructions
4764 than LDP/STP instructions. */
4765 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4766 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4767 return FP_REGS;
4769 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4770 return GENERAL_REGS;
4772 return NO_REGS;
4775 static bool
4776 aarch64_can_eliminate (const int from, const int to)
4778 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4779 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4781 if (frame_pointer_needed)
4783 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4784 return true;
4785 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4786 return false;
4787 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4788 && !cfun->calls_alloca)
4789 return true;
4790 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4791 return true;
4793 return false;
4795 else
4797 /* If we decided that we didn't need a leaf frame pointer but then used
4798 LR in the function, then we'll want a frame pointer after all, so
4799 prevent this elimination to ensure a frame pointer is used. */
4800 if (to == STACK_POINTER_REGNUM
4801 && flag_omit_leaf_frame_pointer
4802 && df_regs_ever_live_p (LR_REGNUM))
4803 return false;
4806 return true;
4809 HOST_WIDE_INT
4810 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4812 aarch64_layout_frame ();
4814 if (to == HARD_FRAME_POINTER_REGNUM)
4816 if (from == ARG_POINTER_REGNUM)
4817 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4819 if (from == FRAME_POINTER_REGNUM)
4820 return (cfun->machine->frame.hard_fp_offset
4821 - cfun->machine->frame.saved_varargs_size);
4824 if (to == STACK_POINTER_REGNUM)
4826 if (from == FRAME_POINTER_REGNUM)
4827 return (cfun->machine->frame.frame_size
4828 - cfun->machine->frame.saved_varargs_size);
4831 return cfun->machine->frame.frame_size;
4834 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4835 previous frame. */
4838 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4840 if (count != 0)
4841 return const0_rtx;
4842 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4846 static void
4847 aarch64_asm_trampoline_template (FILE *f)
4849 if (TARGET_ILP32)
4851 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4852 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4854 else
4856 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4857 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4859 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4860 assemble_aligned_integer (4, const0_rtx);
4861 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4862 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4865 static void
4866 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4868 rtx fnaddr, mem, a_tramp;
4869 const int tramp_code_sz = 16;
4871 /* Don't need to copy the trailing D-words, we fill those in below. */
4872 emit_block_move (m_tramp, assemble_trampoline_template (),
4873 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4874 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4875 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4876 if (GET_MODE (fnaddr) != ptr_mode)
4877 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4878 emit_move_insn (mem, fnaddr);
4880 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4881 emit_move_insn (mem, chain_value);
4883 /* XXX We should really define a "clear_cache" pattern and use
4884 gen_clear_cache(). */
4885 a_tramp = XEXP (m_tramp, 0);
4886 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4887 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4888 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4889 ptr_mode);
4892 static unsigned char
4893 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4895 switch (regclass)
4897 case CALLER_SAVE_REGS:
4898 case POINTER_REGS:
4899 case GENERAL_REGS:
4900 case ALL_REGS:
4901 case FP_REGS:
4902 case FP_LO_REGS:
4903 return
4904 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4905 (GET_MODE_SIZE (mode) + 7) / 8;
4906 case STACK_REG:
4907 return 1;
4909 case NO_REGS:
4910 return 0;
4912 default:
4913 break;
4915 gcc_unreachable ();
4918 static reg_class_t
4919 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4921 if (regclass == POINTER_REGS)
4922 return GENERAL_REGS;
4924 if (regclass == STACK_REG)
4926 if (REG_P(x)
4927 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4928 return regclass;
4930 return NO_REGS;
4933 /* If it's an integer immediate that MOVI can't handle, then
4934 FP_REGS is not an option, so we return NO_REGS instead. */
4935 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4936 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4937 return NO_REGS;
4939 /* Register eliminiation can result in a request for
4940 SP+constant->FP_REGS. We cannot support such operations which
4941 use SP as source and an FP_REG as destination, so reject out
4942 right now. */
4943 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4945 rtx lhs = XEXP (x, 0);
4947 /* Look through a possible SUBREG introduced by ILP32. */
4948 if (GET_CODE (lhs) == SUBREG)
4949 lhs = SUBREG_REG (lhs);
4951 gcc_assert (REG_P (lhs));
4952 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4953 POINTER_REGS));
4954 return NO_REGS;
4957 return regclass;
4960 void
4961 aarch64_asm_output_labelref (FILE* f, const char *name)
4963 asm_fprintf (f, "%U%s", name);
4966 static void
4967 aarch64_elf_asm_constructor (rtx symbol, int priority)
4969 if (priority == DEFAULT_INIT_PRIORITY)
4970 default_ctor_section_asm_out_constructor (symbol, priority);
4971 else
4973 section *s;
4974 char buf[18];
4975 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4976 s = get_section (buf, SECTION_WRITE, NULL);
4977 switch_to_section (s);
4978 assemble_align (POINTER_SIZE);
4979 assemble_aligned_integer (POINTER_BYTES, symbol);
4983 static void
4984 aarch64_elf_asm_destructor (rtx symbol, int priority)
4986 if (priority == DEFAULT_INIT_PRIORITY)
4987 default_dtor_section_asm_out_destructor (symbol, priority);
4988 else
4990 section *s;
4991 char buf[18];
4992 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4993 s = get_section (buf, SECTION_WRITE, NULL);
4994 switch_to_section (s);
4995 assemble_align (POINTER_SIZE);
4996 assemble_aligned_integer (POINTER_BYTES, symbol);
5000 const char*
5001 aarch64_output_casesi (rtx *operands)
5003 char buf[100];
5004 char label[100];
5005 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
5006 int index;
5007 static const char *const patterns[4][2] =
5010 "ldrb\t%w3, [%0,%w1,uxtw]",
5011 "add\t%3, %4, %w3, sxtb #2"
5014 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5015 "add\t%3, %4, %w3, sxth #2"
5018 "ldr\t%w3, [%0,%w1,uxtw #2]",
5019 "add\t%3, %4, %w3, sxtw #2"
5021 /* We assume that DImode is only generated when not optimizing and
5022 that we don't really need 64-bit address offsets. That would
5023 imply an object file with 8GB of code in a single function! */
5025 "ldr\t%w3, [%0,%w1,uxtw #2]",
5026 "add\t%3, %4, %w3, sxtw #2"
5030 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5032 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5034 gcc_assert (index >= 0 && index <= 3);
5036 /* Need to implement table size reduction, by chaning the code below. */
5037 output_asm_insn (patterns[index][0], operands);
5038 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5039 snprintf (buf, sizeof (buf),
5040 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5041 output_asm_insn (buf, operands);
5042 output_asm_insn (patterns[index][1], operands);
5043 output_asm_insn ("br\t%3", operands);
5044 assemble_label (asm_out_file, label);
5045 return "";
5049 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5050 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5051 operator. */
5054 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5056 if (shift >= 0 && shift <= 3)
5058 int size;
5059 for (size = 8; size <= 32; size *= 2)
5061 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5062 if (mask == bits << shift)
5063 return size;
5066 return 0;
5069 static bool
5070 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
5071 const_rtx x ATTRIBUTE_UNUSED)
5073 /* We can't use blocks for constants when we're using a per-function
5074 constant pool. */
5075 return false;
5078 static section *
5079 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5080 rtx x ATTRIBUTE_UNUSED,
5081 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5083 /* Force all constant pool entries into the current function section. */
5084 return function_section (current_function_decl);
5088 /* Costs. */
5090 /* Helper function for rtx cost calculation. Strip a shift expression
5091 from X. Returns the inner operand if successful, or the original
5092 expression on failure. */
5093 static rtx
5094 aarch64_strip_shift (rtx x)
5096 rtx op = x;
5098 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5099 we can convert both to ROR during final output. */
5100 if ((GET_CODE (op) == ASHIFT
5101 || GET_CODE (op) == ASHIFTRT
5102 || GET_CODE (op) == LSHIFTRT
5103 || GET_CODE (op) == ROTATERT
5104 || GET_CODE (op) == ROTATE)
5105 && CONST_INT_P (XEXP (op, 1)))
5106 return XEXP (op, 0);
5108 if (GET_CODE (op) == MULT
5109 && CONST_INT_P (XEXP (op, 1))
5110 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5111 return XEXP (op, 0);
5113 return x;
5116 /* Helper function for rtx cost calculation. Strip an extend
5117 expression from X. Returns the inner operand if successful, or the
5118 original expression on failure. We deal with a number of possible
5119 canonicalization variations here. */
5120 static rtx
5121 aarch64_strip_extend (rtx x)
5123 rtx op = x;
5125 /* Zero and sign extraction of a widened value. */
5126 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5127 && XEXP (op, 2) == const0_rtx
5128 && GET_CODE (XEXP (op, 0)) == MULT
5129 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5130 XEXP (op, 1)))
5131 return XEXP (XEXP (op, 0), 0);
5133 /* It can also be represented (for zero-extend) as an AND with an
5134 immediate. */
5135 if (GET_CODE (op) == AND
5136 && GET_CODE (XEXP (op, 0)) == MULT
5137 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5138 && CONST_INT_P (XEXP (op, 1))
5139 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5140 INTVAL (XEXP (op, 1))) != 0)
5141 return XEXP (XEXP (op, 0), 0);
5143 /* Now handle extended register, as this may also have an optional
5144 left shift by 1..4. */
5145 if (GET_CODE (op) == ASHIFT
5146 && CONST_INT_P (XEXP (op, 1))
5147 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5148 op = XEXP (op, 0);
5150 if (GET_CODE (op) == ZERO_EXTEND
5151 || GET_CODE (op) == SIGN_EXTEND)
5152 op = XEXP (op, 0);
5154 if (op != x)
5155 return op;
5157 return x;
5160 /* Helper function for rtx cost calculation. Calculate the cost of
5161 a MULT, which may be part of a multiply-accumulate rtx. Return
5162 the calculated cost of the expression, recursing manually in to
5163 operands where needed. */
5165 static int
5166 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5168 rtx op0, op1;
5169 const struct cpu_cost_table *extra_cost
5170 = aarch64_tune_params->insn_extra_cost;
5171 int cost = 0;
5172 bool maybe_fma = (outer == PLUS || outer == MINUS);
5173 machine_mode mode = GET_MODE (x);
5175 gcc_checking_assert (code == MULT);
5177 op0 = XEXP (x, 0);
5178 op1 = XEXP (x, 1);
5180 if (VECTOR_MODE_P (mode))
5181 mode = GET_MODE_INNER (mode);
5183 /* Integer multiply/fma. */
5184 if (GET_MODE_CLASS (mode) == MODE_INT)
5186 /* The multiply will be canonicalized as a shift, cost it as such. */
5187 if (CONST_INT_P (op1)
5188 && exact_log2 (INTVAL (op1)) > 0)
5190 if (speed)
5192 if (maybe_fma)
5193 /* ADD (shifted register). */
5194 cost += extra_cost->alu.arith_shift;
5195 else
5196 /* LSL (immediate). */
5197 cost += extra_cost->alu.shift;
5200 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5202 return cost;
5205 /* Integer multiplies or FMAs have zero/sign extending variants. */
5206 if ((GET_CODE (op0) == ZERO_EXTEND
5207 && GET_CODE (op1) == ZERO_EXTEND)
5208 || (GET_CODE (op0) == SIGN_EXTEND
5209 && GET_CODE (op1) == SIGN_EXTEND))
5211 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5212 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5214 if (speed)
5216 if (maybe_fma)
5217 /* MADD/SMADDL/UMADDL. */
5218 cost += extra_cost->mult[0].extend_add;
5219 else
5220 /* MUL/SMULL/UMULL. */
5221 cost += extra_cost->mult[0].extend;
5224 return cost;
5227 /* This is either an integer multiply or an FMA. In both cases
5228 we want to recurse and cost the operands. */
5229 cost += rtx_cost (op0, MULT, 0, speed)
5230 + rtx_cost (op1, MULT, 1, speed);
5232 if (speed)
5234 if (maybe_fma)
5235 /* MADD. */
5236 cost += extra_cost->mult[mode == DImode].add;
5237 else
5238 /* MUL. */
5239 cost += extra_cost->mult[mode == DImode].simple;
5242 return cost;
5244 else
5246 if (speed)
5248 /* Floating-point FMA/FMUL can also support negations of the
5249 operands. */
5250 if (GET_CODE (op0) == NEG)
5251 op0 = XEXP (op0, 0);
5252 if (GET_CODE (op1) == NEG)
5253 op1 = XEXP (op1, 0);
5255 if (maybe_fma)
5256 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5257 cost += extra_cost->fp[mode == DFmode].fma;
5258 else
5259 /* FMUL/FNMUL. */
5260 cost += extra_cost->fp[mode == DFmode].mult;
5263 cost += rtx_cost (op0, MULT, 0, speed)
5264 + rtx_cost (op1, MULT, 1, speed);
5265 return cost;
5269 static int
5270 aarch64_address_cost (rtx x,
5271 machine_mode mode,
5272 addr_space_t as ATTRIBUTE_UNUSED,
5273 bool speed)
5275 enum rtx_code c = GET_CODE (x);
5276 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5277 struct aarch64_address_info info;
5278 int cost = 0;
5279 info.shift = 0;
5281 if (!aarch64_classify_address (&info, x, mode, c, false))
5283 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5285 /* This is a CONST or SYMBOL ref which will be split
5286 in a different way depending on the code model in use.
5287 Cost it through the generic infrastructure. */
5288 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5289 /* Divide through by the cost of one instruction to
5290 bring it to the same units as the address costs. */
5291 cost_symbol_ref /= COSTS_N_INSNS (1);
5292 /* The cost is then the cost of preparing the address,
5293 followed by an immediate (possibly 0) offset. */
5294 return cost_symbol_ref + addr_cost->imm_offset;
5296 else
5298 /* This is most likely a jump table from a case
5299 statement. */
5300 return addr_cost->register_offset;
5304 switch (info.type)
5306 case ADDRESS_LO_SUM:
5307 case ADDRESS_SYMBOLIC:
5308 case ADDRESS_REG_IMM:
5309 cost += addr_cost->imm_offset;
5310 break;
5312 case ADDRESS_REG_WB:
5313 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5314 cost += addr_cost->pre_modify;
5315 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5316 cost += addr_cost->post_modify;
5317 else
5318 gcc_unreachable ();
5320 break;
5322 case ADDRESS_REG_REG:
5323 cost += addr_cost->register_offset;
5324 break;
5326 case ADDRESS_REG_UXTW:
5327 case ADDRESS_REG_SXTW:
5328 cost += addr_cost->register_extend;
5329 break;
5331 default:
5332 gcc_unreachable ();
5336 if (info.shift > 0)
5338 /* For the sake of calculating the cost of the shifted register
5339 component, we can treat same sized modes in the same way. */
5340 switch (GET_MODE_BITSIZE (mode))
5342 case 16:
5343 cost += addr_cost->addr_scale_costs.hi;
5344 break;
5346 case 32:
5347 cost += addr_cost->addr_scale_costs.si;
5348 break;
5350 case 64:
5351 cost += addr_cost->addr_scale_costs.di;
5352 break;
5354 /* We can't tell, or this is a 128-bit vector. */
5355 default:
5356 cost += addr_cost->addr_scale_costs.ti;
5357 break;
5361 return cost;
5364 /* Return true if the RTX X in mode MODE is a zero or sign extract
5365 usable in an ADD or SUB (extended register) instruction. */
5366 static bool
5367 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5369 /* Catch add with a sign extract.
5370 This is add_<optab><mode>_multp2. */
5371 if (GET_CODE (x) == SIGN_EXTRACT
5372 || GET_CODE (x) == ZERO_EXTRACT)
5374 rtx op0 = XEXP (x, 0);
5375 rtx op1 = XEXP (x, 1);
5376 rtx op2 = XEXP (x, 2);
5378 if (GET_CODE (op0) == MULT
5379 && CONST_INT_P (op1)
5380 && op2 == const0_rtx
5381 && CONST_INT_P (XEXP (op0, 1))
5382 && aarch64_is_extend_from_extract (mode,
5383 XEXP (op0, 1),
5384 op1))
5386 return true;
5390 return false;
5393 static bool
5394 aarch64_frint_unspec_p (unsigned int u)
5396 switch (u)
5398 case UNSPEC_FRINTZ:
5399 case UNSPEC_FRINTP:
5400 case UNSPEC_FRINTM:
5401 case UNSPEC_FRINTA:
5402 case UNSPEC_FRINTN:
5403 case UNSPEC_FRINTX:
5404 case UNSPEC_FRINTI:
5405 return true;
5407 default:
5408 return false;
5412 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5413 storing it in *COST. Result is true if the total cost of the operation
5414 has now been calculated. */
5415 static bool
5416 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5418 rtx inner;
5419 rtx comparator;
5420 enum rtx_code cmpcode;
5422 if (COMPARISON_P (op0))
5424 inner = XEXP (op0, 0);
5425 comparator = XEXP (op0, 1);
5426 cmpcode = GET_CODE (op0);
5428 else
5430 inner = op0;
5431 comparator = const0_rtx;
5432 cmpcode = NE;
5435 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5437 /* Conditional branch. */
5438 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5439 return true;
5440 else
5442 if (cmpcode == NE || cmpcode == EQ)
5444 if (comparator == const0_rtx)
5446 /* TBZ/TBNZ/CBZ/CBNZ. */
5447 if (GET_CODE (inner) == ZERO_EXTRACT)
5448 /* TBZ/TBNZ. */
5449 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5450 0, speed);
5451 else
5452 /* CBZ/CBNZ. */
5453 *cost += rtx_cost (inner, cmpcode, 0, speed);
5455 return true;
5458 else if (cmpcode == LT || cmpcode == GE)
5460 /* TBZ/TBNZ. */
5461 if (comparator == const0_rtx)
5462 return true;
5466 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5468 /* It's a conditional operation based on the status flags,
5469 so it must be some flavor of CSEL. */
5471 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5472 if (GET_CODE (op1) == NEG
5473 || GET_CODE (op1) == NOT
5474 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5475 op1 = XEXP (op1, 0);
5477 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5478 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5479 return true;
5482 /* We don't know what this is, cost all operands. */
5483 return false;
5486 /* Calculate the cost of calculating X, storing it in *COST. Result
5487 is true if the total cost of the operation has now been calculated. */
5488 static bool
5489 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5490 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5492 rtx op0, op1, op2;
5493 const struct cpu_cost_table *extra_cost
5494 = aarch64_tune_params->insn_extra_cost;
5495 machine_mode mode = GET_MODE (x);
5497 /* By default, assume that everything has equivalent cost to the
5498 cheapest instruction. Any additional costs are applied as a delta
5499 above this default. */
5500 *cost = COSTS_N_INSNS (1);
5502 /* TODO: The cost infrastructure currently does not handle
5503 vector operations. Assume that all vector operations
5504 are equally expensive. */
5505 if (VECTOR_MODE_P (mode))
5507 if (speed)
5508 *cost += extra_cost->vect.alu;
5509 return true;
5512 switch (code)
5514 case SET:
5515 /* The cost depends entirely on the operands to SET. */
5516 *cost = 0;
5517 op0 = SET_DEST (x);
5518 op1 = SET_SRC (x);
5520 switch (GET_CODE (op0))
5522 case MEM:
5523 if (speed)
5525 rtx address = XEXP (op0, 0);
5526 if (GET_MODE_CLASS (mode) == MODE_INT)
5527 *cost += extra_cost->ldst.store;
5528 else if (mode == SFmode)
5529 *cost += extra_cost->ldst.storef;
5530 else if (mode == DFmode)
5531 *cost += extra_cost->ldst.stored;
5533 *cost +=
5534 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5535 0, speed));
5538 *cost += rtx_cost (op1, SET, 1, speed);
5539 return true;
5541 case SUBREG:
5542 if (! REG_P (SUBREG_REG (op0)))
5543 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5545 /* Fall through. */
5546 case REG:
5547 /* const0_rtx is in general free, but we will use an
5548 instruction to set a register to 0. */
5549 if (REG_P (op1) || op1 == const0_rtx)
5551 /* The cost is 1 per register copied. */
5552 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5553 / UNITS_PER_WORD;
5554 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5556 else
5557 /* Cost is just the cost of the RHS of the set. */
5558 *cost += rtx_cost (op1, SET, 1, speed);
5559 return true;
5561 case ZERO_EXTRACT:
5562 case SIGN_EXTRACT:
5563 /* Bit-field insertion. Strip any redundant widening of
5564 the RHS to meet the width of the target. */
5565 if (GET_CODE (op1) == SUBREG)
5566 op1 = SUBREG_REG (op1);
5567 if ((GET_CODE (op1) == ZERO_EXTEND
5568 || GET_CODE (op1) == SIGN_EXTEND)
5569 && CONST_INT_P (XEXP (op0, 1))
5570 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5571 >= INTVAL (XEXP (op0, 1))))
5572 op1 = XEXP (op1, 0);
5574 if (CONST_INT_P (op1))
5576 /* MOV immediate is assumed to always be cheap. */
5577 *cost = COSTS_N_INSNS (1);
5579 else
5581 /* BFM. */
5582 if (speed)
5583 *cost += extra_cost->alu.bfi;
5584 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5587 return true;
5589 default:
5590 /* We can't make sense of this, assume default cost. */
5591 *cost = COSTS_N_INSNS (1);
5592 return false;
5594 return false;
5596 case CONST_INT:
5597 /* If an instruction can incorporate a constant within the
5598 instruction, the instruction's expression avoids calling
5599 rtx_cost() on the constant. If rtx_cost() is called on a
5600 constant, then it is usually because the constant must be
5601 moved into a register by one or more instructions.
5603 The exception is constant 0, which can be expressed
5604 as XZR/WZR and is therefore free. The exception to this is
5605 if we have (set (reg) (const0_rtx)) in which case we must cost
5606 the move. However, we can catch that when we cost the SET, so
5607 we don't need to consider that here. */
5608 if (x == const0_rtx)
5609 *cost = 0;
5610 else
5612 /* To an approximation, building any other constant is
5613 proportionally expensive to the number of instructions
5614 required to build that constant. This is true whether we
5615 are compiling for SPEED or otherwise. */
5616 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5617 (NULL_RTX, x, false, mode));
5619 return true;
5621 case CONST_DOUBLE:
5622 if (speed)
5624 /* mov[df,sf]_aarch64. */
5625 if (aarch64_float_const_representable_p (x))
5626 /* FMOV (scalar immediate). */
5627 *cost += extra_cost->fp[mode == DFmode].fpconst;
5628 else if (!aarch64_float_const_zero_rtx_p (x))
5630 /* This will be a load from memory. */
5631 if (mode == DFmode)
5632 *cost += extra_cost->ldst.loadd;
5633 else
5634 *cost += extra_cost->ldst.loadf;
5636 else
5637 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5638 or MOV v0.s[0], wzr - neither of which are modeled by the
5639 cost tables. Just use the default cost. */
5644 return true;
5646 case MEM:
5647 if (speed)
5649 /* For loads we want the base cost of a load, plus an
5650 approximation for the additional cost of the addressing
5651 mode. */
5652 rtx address = XEXP (x, 0);
5653 if (GET_MODE_CLASS (mode) == MODE_INT)
5654 *cost += extra_cost->ldst.load;
5655 else if (mode == SFmode)
5656 *cost += extra_cost->ldst.loadf;
5657 else if (mode == DFmode)
5658 *cost += extra_cost->ldst.loadd;
5660 *cost +=
5661 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5662 0, speed));
5665 return true;
5667 case NEG:
5668 op0 = XEXP (x, 0);
5670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5672 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5673 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5675 /* CSETM. */
5676 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5677 return true;
5680 /* Cost this as SUB wzr, X. */
5681 op0 = CONST0_RTX (GET_MODE (x));
5682 op1 = XEXP (x, 0);
5683 goto cost_minus;
5686 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5688 /* Support (neg(fma...)) as a single instruction only if
5689 sign of zeros is unimportant. This matches the decision
5690 making in aarch64.md. */
5691 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5693 /* FNMADD. */
5694 *cost = rtx_cost (op0, NEG, 0, speed);
5695 return true;
5697 if (speed)
5698 /* FNEG. */
5699 *cost += extra_cost->fp[mode == DFmode].neg;
5700 return false;
5703 return false;
5705 case CLRSB:
5706 case CLZ:
5707 if (speed)
5708 *cost += extra_cost->alu.clz;
5710 return false;
5712 case COMPARE:
5713 op0 = XEXP (x, 0);
5714 op1 = XEXP (x, 1);
5716 if (op1 == const0_rtx
5717 && GET_CODE (op0) == AND)
5719 x = op0;
5720 goto cost_logic;
5723 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5725 /* TODO: A write to the CC flags possibly costs extra, this
5726 needs encoding in the cost tables. */
5728 /* CC_ZESWPmode supports zero extend for free. */
5729 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5730 op0 = XEXP (op0, 0);
5732 /* ANDS. */
5733 if (GET_CODE (op0) == AND)
5735 x = op0;
5736 goto cost_logic;
5739 if (GET_CODE (op0) == PLUS)
5741 /* ADDS (and CMN alias). */
5742 x = op0;
5743 goto cost_plus;
5746 if (GET_CODE (op0) == MINUS)
5748 /* SUBS. */
5749 x = op0;
5750 goto cost_minus;
5753 if (GET_CODE (op1) == NEG)
5755 /* CMN. */
5756 if (speed)
5757 *cost += extra_cost->alu.arith;
5759 *cost += rtx_cost (op0, COMPARE, 0, speed);
5760 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5761 return true;
5764 /* CMP.
5766 Compare can freely swap the order of operands, and
5767 canonicalization puts the more complex operation first.
5768 But the integer MINUS logic expects the shift/extend
5769 operation in op1. */
5770 if (! (REG_P (op0)
5771 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5773 op0 = XEXP (x, 1);
5774 op1 = XEXP (x, 0);
5776 goto cost_minus;
5779 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5781 /* FCMP. */
5782 if (speed)
5783 *cost += extra_cost->fp[mode == DFmode].compare;
5785 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5787 /* FCMP supports constant 0.0 for no extra cost. */
5788 return true;
5790 return false;
5793 return false;
5795 case MINUS:
5797 op0 = XEXP (x, 0);
5798 op1 = XEXP (x, 1);
5800 cost_minus:
5801 /* Detect valid immediates. */
5802 if ((GET_MODE_CLASS (mode) == MODE_INT
5803 || (GET_MODE_CLASS (mode) == MODE_CC
5804 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5805 && CONST_INT_P (op1)
5806 && aarch64_uimm12_shift (INTVAL (op1)))
5808 *cost += rtx_cost (op0, MINUS, 0, speed);
5810 if (speed)
5811 /* SUB(S) (immediate). */
5812 *cost += extra_cost->alu.arith;
5813 return true;
5817 /* Look for SUB (extended register). */
5818 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5820 if (speed)
5821 *cost += extra_cost->alu.arith_shift;
5823 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5824 (enum rtx_code) GET_CODE (op1),
5825 0, speed);
5826 return true;
5829 rtx new_op1 = aarch64_strip_extend (op1);
5831 /* Cost this as an FMA-alike operation. */
5832 if ((GET_CODE (new_op1) == MULT
5833 || GET_CODE (new_op1) == ASHIFT)
5834 && code != COMPARE)
5836 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5837 (enum rtx_code) code,
5838 speed);
5839 *cost += rtx_cost (op0, MINUS, 0, speed);
5840 return true;
5843 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5845 if (speed)
5847 if (GET_MODE_CLASS (mode) == MODE_INT)
5848 /* SUB(S). */
5849 *cost += extra_cost->alu.arith;
5850 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5851 /* FSUB. */
5852 *cost += extra_cost->fp[mode == DFmode].addsub;
5854 return true;
5857 case PLUS:
5859 rtx new_op0;
5861 op0 = XEXP (x, 0);
5862 op1 = XEXP (x, 1);
5864 cost_plus:
5865 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5866 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5868 /* CSINC. */
5869 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5870 *cost += rtx_cost (op1, PLUS, 1, speed);
5871 return true;
5874 if (GET_MODE_CLASS (mode) == MODE_INT
5875 && CONST_INT_P (op1)
5876 && aarch64_uimm12_shift (INTVAL (op1)))
5878 *cost += rtx_cost (op0, PLUS, 0, speed);
5880 if (speed)
5881 /* ADD (immediate). */
5882 *cost += extra_cost->alu.arith;
5883 return true;
5886 /* Look for ADD (extended register). */
5887 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5889 if (speed)
5890 *cost += extra_cost->alu.arith_shift;
5892 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5893 (enum rtx_code) GET_CODE (op0),
5894 0, speed);
5895 return true;
5898 /* Strip any extend, leave shifts behind as we will
5899 cost them through mult_cost. */
5900 new_op0 = aarch64_strip_extend (op0);
5902 if (GET_CODE (new_op0) == MULT
5903 || GET_CODE (new_op0) == ASHIFT)
5905 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5906 speed);
5907 *cost += rtx_cost (op1, PLUS, 1, speed);
5908 return true;
5911 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5912 + rtx_cost (op1, PLUS, 1, speed));
5914 if (speed)
5916 if (GET_MODE_CLASS (mode) == MODE_INT)
5917 /* ADD. */
5918 *cost += extra_cost->alu.arith;
5919 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5920 /* FADD. */
5921 *cost += extra_cost->fp[mode == DFmode].addsub;
5923 return true;
5926 case BSWAP:
5927 *cost = COSTS_N_INSNS (1);
5929 if (speed)
5930 *cost += extra_cost->alu.rev;
5932 return false;
5934 case IOR:
5935 if (aarch_rev16_p (x))
5937 *cost = COSTS_N_INSNS (1);
5939 if (speed)
5940 *cost += extra_cost->alu.rev;
5942 return true;
5944 /* Fall through. */
5945 case XOR:
5946 case AND:
5947 cost_logic:
5948 op0 = XEXP (x, 0);
5949 op1 = XEXP (x, 1);
5951 if (code == AND
5952 && GET_CODE (op0) == MULT
5953 && CONST_INT_P (XEXP (op0, 1))
5954 && CONST_INT_P (op1)
5955 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5956 INTVAL (op1)) != 0)
5958 /* This is a UBFM/SBFM. */
5959 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5960 if (speed)
5961 *cost += extra_cost->alu.bfx;
5962 return true;
5965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5967 /* We possibly get the immediate for free, this is not
5968 modelled. */
5969 if (CONST_INT_P (op1)
5970 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5972 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5974 if (speed)
5975 *cost += extra_cost->alu.logical;
5977 return true;
5979 else
5981 rtx new_op0 = op0;
5983 /* Handle ORN, EON, or BIC. */
5984 if (GET_CODE (op0) == NOT)
5985 op0 = XEXP (op0, 0);
5987 new_op0 = aarch64_strip_shift (op0);
5989 /* If we had a shift on op0 then this is a logical-shift-
5990 by-register/immediate operation. Otherwise, this is just
5991 a logical operation. */
5992 if (speed)
5994 if (new_op0 != op0)
5996 /* Shift by immediate. */
5997 if (CONST_INT_P (XEXP (op0, 1)))
5998 *cost += extra_cost->alu.log_shift;
5999 else
6000 *cost += extra_cost->alu.log_shift_reg;
6002 else
6003 *cost += extra_cost->alu.logical;
6006 /* In both cases we want to cost both operands. */
6007 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
6008 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
6010 return true;
6013 return false;
6015 case NOT:
6016 /* MVN. */
6017 if (speed)
6018 *cost += extra_cost->alu.logical;
6020 /* The logical instruction could have the shifted register form,
6021 but the cost is the same if the shift is processed as a separate
6022 instruction, so we don't bother with it here. */
6023 return false;
6025 case ZERO_EXTEND:
6027 op0 = XEXP (x, 0);
6028 /* If a value is written in SI mode, then zero extended to DI
6029 mode, the operation will in general be free as a write to
6030 a 'w' register implicitly zeroes the upper bits of an 'x'
6031 register. However, if this is
6033 (set (reg) (zero_extend (reg)))
6035 we must cost the explicit register move. */
6036 if (mode == DImode
6037 && GET_MODE (op0) == SImode
6038 && outer == SET)
6040 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
6042 if (!op_cost && speed)
6043 /* MOV. */
6044 *cost += extra_cost->alu.extend;
6045 else
6046 /* Free, the cost is that of the SI mode operation. */
6047 *cost = op_cost;
6049 return true;
6051 else if (MEM_P (XEXP (x, 0)))
6053 /* All loads can zero extend to any size for free. */
6054 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
6055 return true;
6058 /* UXTB/UXTH. */
6059 if (speed)
6060 *cost += extra_cost->alu.extend;
6062 return false;
6064 case SIGN_EXTEND:
6065 if (MEM_P (XEXP (x, 0)))
6067 /* LDRSH. */
6068 if (speed)
6070 rtx address = XEXP (XEXP (x, 0), 0);
6071 *cost += extra_cost->ldst.load_sign_extend;
6073 *cost +=
6074 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6075 0, speed));
6077 return true;
6080 if (speed)
6081 *cost += extra_cost->alu.extend;
6082 return false;
6084 case ASHIFT:
6085 op0 = XEXP (x, 0);
6086 op1 = XEXP (x, 1);
6088 if (CONST_INT_P (op1))
6090 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6091 aliases. */
6092 if (speed)
6093 *cost += extra_cost->alu.shift;
6095 /* We can incorporate zero/sign extend for free. */
6096 if (GET_CODE (op0) == ZERO_EXTEND
6097 || GET_CODE (op0) == SIGN_EXTEND)
6098 op0 = XEXP (op0, 0);
6100 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6101 return true;
6103 else
6105 /* LSLV. */
6106 if (speed)
6107 *cost += extra_cost->alu.shift_reg;
6109 return false; /* All arguments need to be in registers. */
6112 case ROTATE:
6113 case ROTATERT:
6114 case LSHIFTRT:
6115 case ASHIFTRT:
6116 op0 = XEXP (x, 0);
6117 op1 = XEXP (x, 1);
6119 if (CONST_INT_P (op1))
6121 /* ASR (immediate) and friends. */
6122 if (speed)
6123 *cost += extra_cost->alu.shift;
6125 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6126 return true;
6128 else
6131 /* ASR (register) and friends. */
6132 if (speed)
6133 *cost += extra_cost->alu.shift_reg;
6135 return false; /* All arguments need to be in registers. */
6138 case SYMBOL_REF:
6140 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6142 /* LDR. */
6143 if (speed)
6144 *cost += extra_cost->ldst.load;
6146 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6147 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6149 /* ADRP, followed by ADD. */
6150 *cost += COSTS_N_INSNS (1);
6151 if (speed)
6152 *cost += 2 * extra_cost->alu.arith;
6154 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6155 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6157 /* ADR. */
6158 if (speed)
6159 *cost += extra_cost->alu.arith;
6162 if (flag_pic)
6164 /* One extra load instruction, after accessing the GOT. */
6165 *cost += COSTS_N_INSNS (1);
6166 if (speed)
6167 *cost += extra_cost->ldst.load;
6169 return true;
6171 case HIGH:
6172 case LO_SUM:
6173 /* ADRP/ADD (immediate). */
6174 if (speed)
6175 *cost += extra_cost->alu.arith;
6176 return true;
6178 case ZERO_EXTRACT:
6179 case SIGN_EXTRACT:
6180 /* UBFX/SBFX. */
6181 if (speed)
6182 *cost += extra_cost->alu.bfx;
6184 /* We can trust that the immediates used will be correct (there
6185 are no by-register forms), so we need only cost op0. */
6186 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
6187 return true;
6189 case MULT:
6190 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6191 /* aarch64_rtx_mult_cost always handles recursion to its
6192 operands. */
6193 return true;
6195 case MOD:
6196 case UMOD:
6197 if (speed)
6199 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6200 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6201 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
6202 else if (GET_MODE (x) == DFmode)
6203 *cost += (extra_cost->fp[1].mult
6204 + extra_cost->fp[1].div);
6205 else if (GET_MODE (x) == SFmode)
6206 *cost += (extra_cost->fp[0].mult
6207 + extra_cost->fp[0].div);
6209 return false; /* All arguments need to be in registers. */
6211 case DIV:
6212 case UDIV:
6213 case SQRT:
6214 if (speed)
6216 if (GET_MODE_CLASS (mode) == MODE_INT)
6217 /* There is no integer SQRT, so only DIV and UDIV can get
6218 here. */
6219 *cost += extra_cost->mult[mode == DImode].idiv;
6220 else
6221 *cost += extra_cost->fp[mode == DFmode].div;
6223 return false; /* All arguments need to be in registers. */
6225 case IF_THEN_ELSE:
6226 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6227 XEXP (x, 2), cost, speed);
6229 case EQ:
6230 case NE:
6231 case GT:
6232 case GTU:
6233 case LT:
6234 case LTU:
6235 case GE:
6236 case GEU:
6237 case LE:
6238 case LEU:
6240 return false; /* All arguments must be in registers. */
6242 case FMA:
6243 op0 = XEXP (x, 0);
6244 op1 = XEXP (x, 1);
6245 op2 = XEXP (x, 2);
6247 if (speed)
6248 *cost += extra_cost->fp[mode == DFmode].fma;
6250 /* FMSUB, FNMADD, and FNMSUB are free. */
6251 if (GET_CODE (op0) == NEG)
6252 op0 = XEXP (op0, 0);
6254 if (GET_CODE (op2) == NEG)
6255 op2 = XEXP (op2, 0);
6257 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6258 and the by-element operand as operand 0. */
6259 if (GET_CODE (op1) == NEG)
6260 op1 = XEXP (op1, 0);
6262 /* Catch vector-by-element operations. The by-element operand can
6263 either be (vec_duplicate (vec_select (x))) or just
6264 (vec_select (x)), depending on whether we are multiplying by
6265 a vector or a scalar.
6267 Canonicalization is not very good in these cases, FMA4 will put the
6268 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6269 if (GET_CODE (op0) == VEC_DUPLICATE)
6270 op0 = XEXP (op0, 0);
6271 else if (GET_CODE (op1) == VEC_DUPLICATE)
6272 op1 = XEXP (op1, 0);
6274 if (GET_CODE (op0) == VEC_SELECT)
6275 op0 = XEXP (op0, 0);
6276 else if (GET_CODE (op1) == VEC_SELECT)
6277 op1 = XEXP (op1, 0);
6279 /* If the remaining parameters are not registers,
6280 get the cost to put them into registers. */
6281 *cost += rtx_cost (op0, FMA, 0, speed);
6282 *cost += rtx_cost (op1, FMA, 1, speed);
6283 *cost += rtx_cost (op2, FMA, 2, speed);
6284 return true;
6286 case FLOAT_EXTEND:
6287 if (speed)
6288 *cost += extra_cost->fp[mode == DFmode].widen;
6289 return false;
6291 case FLOAT_TRUNCATE:
6292 if (speed)
6293 *cost += extra_cost->fp[mode == DFmode].narrow;
6294 return false;
6296 case FIX:
6297 case UNSIGNED_FIX:
6298 x = XEXP (x, 0);
6299 /* Strip the rounding part. They will all be implemented
6300 by the fcvt* family of instructions anyway. */
6301 if (GET_CODE (x) == UNSPEC)
6303 unsigned int uns_code = XINT (x, 1);
6305 if (uns_code == UNSPEC_FRINTA
6306 || uns_code == UNSPEC_FRINTM
6307 || uns_code == UNSPEC_FRINTN
6308 || uns_code == UNSPEC_FRINTP
6309 || uns_code == UNSPEC_FRINTZ)
6310 x = XVECEXP (x, 0, 0);
6313 if (speed)
6314 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6316 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6317 return true;
6319 case ABS:
6320 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6322 /* FABS and FNEG are analogous. */
6323 if (speed)
6324 *cost += extra_cost->fp[mode == DFmode].neg;
6326 else
6328 /* Integer ABS will either be split to
6329 two arithmetic instructions, or will be an ABS
6330 (scalar), which we don't model. */
6331 *cost = COSTS_N_INSNS (2);
6332 if (speed)
6333 *cost += 2 * extra_cost->alu.arith;
6335 return false;
6337 case SMAX:
6338 case SMIN:
6339 if (speed)
6341 /* FMAXNM/FMINNM/FMAX/FMIN.
6342 TODO: This may not be accurate for all implementations, but
6343 we do not model this in the cost tables. */
6344 *cost += extra_cost->fp[mode == DFmode].addsub;
6346 return false;
6348 case UNSPEC:
6349 /* The floating point round to integer frint* instructions. */
6350 if (aarch64_frint_unspec_p (XINT (x, 1)))
6352 if (speed)
6353 *cost += extra_cost->fp[mode == DFmode].roundint;
6355 return false;
6358 if (XINT (x, 1) == UNSPEC_RBIT)
6360 if (speed)
6361 *cost += extra_cost->alu.rev;
6363 return false;
6365 break;
6367 case TRUNCATE:
6369 /* Decompose <su>muldi3_highpart. */
6370 if (/* (truncate:DI */
6371 mode == DImode
6372 /* (lshiftrt:TI */
6373 && GET_MODE (XEXP (x, 0)) == TImode
6374 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6375 /* (mult:TI */
6376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6377 /* (ANY_EXTEND:TI (reg:DI))
6378 (ANY_EXTEND:TI (reg:DI))) */
6379 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6380 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6381 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6382 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6383 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6384 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6385 /* (const_int 64) */
6386 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6387 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6389 /* UMULH/SMULH. */
6390 if (speed)
6391 *cost += extra_cost->mult[mode == DImode].extend;
6392 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6393 MULT, 0, speed);
6394 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6395 MULT, 1, speed);
6396 return true;
6399 /* Fall through. */
6400 default:
6401 break;
6404 if (dump_file && (dump_flags & TDF_DETAILS))
6405 fprintf (dump_file,
6406 "\nFailed to cost RTX. Assuming default cost.\n");
6408 return true;
6411 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6412 calculated for X. This cost is stored in *COST. Returns true
6413 if the total cost of X was calculated. */
6414 static bool
6415 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6416 int param, int *cost, bool speed)
6418 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6420 if (dump_file && (dump_flags & TDF_DETAILS))
6422 print_rtl_single (dump_file, x);
6423 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6424 speed ? "Hot" : "Cold",
6425 *cost, result ? "final" : "partial");
6428 return result;
6431 static int
6432 aarch64_register_move_cost (machine_mode mode,
6433 reg_class_t from_i, reg_class_t to_i)
6435 enum reg_class from = (enum reg_class) from_i;
6436 enum reg_class to = (enum reg_class) to_i;
6437 const struct cpu_regmove_cost *regmove_cost
6438 = aarch64_tune_params->regmove_cost;
6440 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6441 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6442 to = GENERAL_REGS;
6444 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6445 from = GENERAL_REGS;
6447 /* Moving between GPR and stack cost is the same as GP2GP. */
6448 if ((from == GENERAL_REGS && to == STACK_REG)
6449 || (to == GENERAL_REGS && from == STACK_REG))
6450 return regmove_cost->GP2GP;
6452 /* To/From the stack register, we move via the gprs. */
6453 if (to == STACK_REG || from == STACK_REG)
6454 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6455 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6457 if (GET_MODE_SIZE (mode) == 16)
6459 /* 128-bit operations on general registers require 2 instructions. */
6460 if (from == GENERAL_REGS && to == GENERAL_REGS)
6461 return regmove_cost->GP2GP * 2;
6462 else if (from == GENERAL_REGS)
6463 return regmove_cost->GP2FP * 2;
6464 else if (to == GENERAL_REGS)
6465 return regmove_cost->FP2GP * 2;
6467 /* When AdvSIMD instructions are disabled it is not possible to move
6468 a 128-bit value directly between Q registers. This is handled in
6469 secondary reload. A general register is used as a scratch to move
6470 the upper DI value and the lower DI value is moved directly,
6471 hence the cost is the sum of three moves. */
6472 if (! TARGET_SIMD)
6473 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6475 return regmove_cost->FP2FP;
6478 if (from == GENERAL_REGS && to == GENERAL_REGS)
6479 return regmove_cost->GP2GP;
6480 else if (from == GENERAL_REGS)
6481 return regmove_cost->GP2FP;
6482 else if (to == GENERAL_REGS)
6483 return regmove_cost->FP2GP;
6485 return regmove_cost->FP2FP;
6488 static int
6489 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6490 reg_class_t rclass ATTRIBUTE_UNUSED,
6491 bool in ATTRIBUTE_UNUSED)
6493 return aarch64_tune_params->memmov_cost;
6496 /* Return the number of instructions that can be issued per cycle. */
6497 static int
6498 aarch64_sched_issue_rate (void)
6500 return aarch64_tune_params->issue_rate;
6503 static int
6504 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6506 int issue_rate = aarch64_sched_issue_rate ();
6508 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
6511 /* Vectorizer cost model target hooks. */
6513 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6514 static int
6515 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6516 tree vectype,
6517 int misalign ATTRIBUTE_UNUSED)
6519 unsigned elements;
6521 switch (type_of_cost)
6523 case scalar_stmt:
6524 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6526 case scalar_load:
6527 return aarch64_tune_params->vec_costs->scalar_load_cost;
6529 case scalar_store:
6530 return aarch64_tune_params->vec_costs->scalar_store_cost;
6532 case vector_stmt:
6533 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6535 case vector_load:
6536 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6538 case vector_store:
6539 return aarch64_tune_params->vec_costs->vec_store_cost;
6541 case vec_to_scalar:
6542 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6544 case scalar_to_vec:
6545 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6547 case unaligned_load:
6548 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6550 case unaligned_store:
6551 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6553 case cond_branch_taken:
6554 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6556 case cond_branch_not_taken:
6557 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6559 case vec_perm:
6560 case vec_promote_demote:
6561 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6563 case vec_construct:
6564 elements = TYPE_VECTOR_SUBPARTS (vectype);
6565 return elements / 2 + 1;
6567 default:
6568 gcc_unreachable ();
6572 /* Implement targetm.vectorize.add_stmt_cost. */
6573 static unsigned
6574 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6575 struct _stmt_vec_info *stmt_info, int misalign,
6576 enum vect_cost_model_location where)
6578 unsigned *cost = (unsigned *) data;
6579 unsigned retval = 0;
6581 if (flag_vect_cost_model)
6583 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6584 int stmt_cost =
6585 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6587 /* Statements in an inner loop relative to the loop being
6588 vectorized are weighted more heavily. The value here is
6589 a function (linear for now) of the loop nest level. */
6590 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6592 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6593 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6594 unsigned nest_level = loop_depth (loop);
6596 count *= nest_level;
6599 retval = (unsigned) (count * stmt_cost);
6600 cost[where] += retval;
6603 return retval;
6606 static void initialize_aarch64_code_model (void);
6608 /* Parse the architecture extension string. */
6610 static void
6611 aarch64_parse_extension (char *str)
6613 /* The extension string is parsed left to right. */
6614 const struct aarch64_option_extension *opt = NULL;
6616 /* Flag to say whether we are adding or removing an extension. */
6617 int adding_ext = -1;
6619 while (str != NULL && *str != 0)
6621 char *ext;
6622 size_t len;
6624 str++;
6625 ext = strchr (str, '+');
6627 if (ext != NULL)
6628 len = ext - str;
6629 else
6630 len = strlen (str);
6632 if (len >= 2 && strncmp (str, "no", 2) == 0)
6634 adding_ext = 0;
6635 len -= 2;
6636 str += 2;
6638 else if (len > 0)
6639 adding_ext = 1;
6641 if (len == 0)
6643 error ("missing feature modifier after %qs", adding_ext ? "+"
6644 : "+no");
6645 return;
6648 /* Scan over the extensions table trying to find an exact match. */
6649 for (opt = all_extensions; opt->name != NULL; opt++)
6651 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6653 /* Add or remove the extension. */
6654 if (adding_ext)
6655 aarch64_isa_flags |= opt->flags_on;
6656 else
6657 aarch64_isa_flags &= ~(opt->flags_off);
6658 break;
6662 if (opt->name == NULL)
6664 /* Extension not found in list. */
6665 error ("unknown feature modifier %qs", str);
6666 return;
6669 str = ext;
6672 return;
6675 /* Parse the ARCH string. */
6677 static void
6678 aarch64_parse_arch (void)
6680 char *ext;
6681 const struct processor *arch;
6682 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6683 size_t len;
6685 strcpy (str, aarch64_arch_string);
6687 ext = strchr (str, '+');
6689 if (ext != NULL)
6690 len = ext - str;
6691 else
6692 len = strlen (str);
6694 if (len == 0)
6696 error ("missing arch name in -march=%qs", str);
6697 return;
6700 /* Loop through the list of supported ARCHs to find a match. */
6701 for (arch = all_architectures; arch->name != NULL; arch++)
6703 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6705 selected_arch = arch;
6706 aarch64_isa_flags = selected_arch->flags;
6708 if (!selected_cpu)
6709 selected_cpu = &all_cores[selected_arch->core];
6711 if (ext != NULL)
6713 /* ARCH string contains at least one extension. */
6714 aarch64_parse_extension (ext);
6717 if (strcmp (selected_arch->arch, selected_cpu->arch))
6719 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6720 selected_cpu->name, selected_arch->name);
6723 return;
6727 /* ARCH name not found in list. */
6728 error ("unknown value %qs for -march", str);
6729 return;
6732 /* Parse the CPU string. */
6734 static void
6735 aarch64_parse_cpu (void)
6737 char *ext;
6738 const struct processor *cpu;
6739 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6740 size_t len;
6742 strcpy (str, aarch64_cpu_string);
6744 ext = strchr (str, '+');
6746 if (ext != NULL)
6747 len = ext - str;
6748 else
6749 len = strlen (str);
6751 if (len == 0)
6753 error ("missing cpu name in -mcpu=%qs", str);
6754 return;
6757 /* Loop through the list of supported CPUs to find a match. */
6758 for (cpu = all_cores; cpu->name != NULL; cpu++)
6760 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6762 selected_cpu = cpu;
6763 aarch64_isa_flags = selected_cpu->flags;
6765 if (ext != NULL)
6767 /* CPU string contains at least one extension. */
6768 aarch64_parse_extension (ext);
6771 return;
6775 /* CPU name not found in list. */
6776 error ("unknown value %qs for -mcpu", str);
6777 return;
6780 /* Parse the TUNE string. */
6782 static void
6783 aarch64_parse_tune (void)
6785 const struct processor *cpu;
6786 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6787 strcpy (str, aarch64_tune_string);
6789 /* Loop through the list of supported CPUs to find a match. */
6790 for (cpu = all_cores; cpu->name != NULL; cpu++)
6792 if (strcmp (cpu->name, str) == 0)
6794 selected_tune = cpu;
6795 return;
6799 /* CPU name not found in list. */
6800 error ("unknown value %qs for -mtune", str);
6801 return;
6805 /* Implement TARGET_OPTION_OVERRIDE. */
6807 static void
6808 aarch64_override_options (void)
6810 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6811 If either of -march or -mtune is given, they override their
6812 respective component of -mcpu.
6814 So, first parse AARCH64_CPU_STRING, then the others, be careful
6815 with -march as, if -mcpu is not present on the command line, march
6816 must set a sensible default CPU. */
6817 if (aarch64_cpu_string)
6819 aarch64_parse_cpu ();
6822 if (aarch64_arch_string)
6824 aarch64_parse_arch ();
6827 if (aarch64_tune_string)
6829 aarch64_parse_tune ();
6832 #ifndef HAVE_AS_MABI_OPTION
6833 /* The compiler may have been configured with 2.23.* binutils, which does
6834 not have support for ILP32. */
6835 if (TARGET_ILP32)
6836 error ("Assembler does not support -mabi=ilp32");
6837 #endif
6839 initialize_aarch64_code_model ();
6841 aarch64_build_bitmask_table ();
6843 /* This target defaults to strict volatile bitfields. */
6844 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6845 flag_strict_volatile_bitfields = 1;
6847 /* If the user did not specify a processor, choose the default
6848 one for them. This will be the CPU set during configuration using
6849 --with-cpu, otherwise it is "generic". */
6850 if (!selected_cpu)
6852 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6853 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6856 gcc_assert (selected_cpu);
6858 if (!selected_tune)
6859 selected_tune = selected_cpu;
6861 aarch64_tune_flags = selected_tune->flags;
6862 aarch64_tune = selected_tune->core;
6863 aarch64_tune_params = selected_tune->tune;
6864 aarch64_architecture_version = selected_cpu->architecture_version;
6866 if (aarch64_fix_a53_err835769 == 2)
6868 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6869 aarch64_fix_a53_err835769 = 1;
6870 #else
6871 aarch64_fix_a53_err835769 = 0;
6872 #endif
6875 /* If not opzimizing for size, set the default
6876 alignment to what the target wants */
6877 if (!optimize_size)
6879 if (align_loops <= 0)
6880 align_loops = aarch64_tune_params->loop_align;
6881 if (align_jumps <= 0)
6882 align_jumps = aarch64_tune_params->jump_align;
6883 if (align_functions <= 0)
6884 align_functions = aarch64_tune_params->function_align;
6887 aarch64_override_options_after_change ();
6890 /* Implement targetm.override_options_after_change. */
6892 static void
6893 aarch64_override_options_after_change (void)
6895 if (flag_omit_frame_pointer)
6896 flag_omit_leaf_frame_pointer = false;
6897 else if (flag_omit_leaf_frame_pointer)
6898 flag_omit_frame_pointer = true;
6901 static struct machine_function *
6902 aarch64_init_machine_status (void)
6904 struct machine_function *machine;
6905 machine = ggc_cleared_alloc<machine_function> ();
6906 return machine;
6909 void
6910 aarch64_init_expanders (void)
6912 init_machine_status = aarch64_init_machine_status;
6915 /* A checking mechanism for the implementation of the various code models. */
6916 static void
6917 initialize_aarch64_code_model (void)
6919 if (flag_pic)
6921 switch (aarch64_cmodel_var)
6923 case AARCH64_CMODEL_TINY:
6924 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6925 break;
6926 case AARCH64_CMODEL_SMALL:
6927 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6928 break;
6929 case AARCH64_CMODEL_LARGE:
6930 sorry ("code model %qs with -f%s", "large",
6931 flag_pic > 1 ? "PIC" : "pic");
6932 default:
6933 gcc_unreachable ();
6936 else
6937 aarch64_cmodel = aarch64_cmodel_var;
6940 /* Return true if SYMBOL_REF X binds locally. */
6942 static bool
6943 aarch64_symbol_binds_local_p (const_rtx x)
6945 return (SYMBOL_REF_DECL (x)
6946 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6947 : SYMBOL_REF_LOCAL_P (x));
6950 /* Return true if SYMBOL_REF X is thread local */
6951 static bool
6952 aarch64_tls_symbol_p (rtx x)
6954 if (! TARGET_HAVE_TLS)
6955 return false;
6957 if (GET_CODE (x) != SYMBOL_REF)
6958 return false;
6960 return SYMBOL_REF_TLS_MODEL (x) != 0;
6963 /* Classify a TLS symbol into one of the TLS kinds. */
6964 enum aarch64_symbol_type
6965 aarch64_classify_tls_symbol (rtx x)
6967 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6969 switch (tls_kind)
6971 case TLS_MODEL_GLOBAL_DYNAMIC:
6972 case TLS_MODEL_LOCAL_DYNAMIC:
6973 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6975 case TLS_MODEL_INITIAL_EXEC:
6976 return SYMBOL_SMALL_GOTTPREL;
6978 case TLS_MODEL_LOCAL_EXEC:
6979 return SYMBOL_SMALL_TPREL;
6981 case TLS_MODEL_EMULATED:
6982 case TLS_MODEL_NONE:
6983 return SYMBOL_FORCE_TO_MEM;
6985 default:
6986 gcc_unreachable ();
6990 /* Return the method that should be used to access SYMBOL_REF or
6991 LABEL_REF X in context CONTEXT. */
6993 enum aarch64_symbol_type
6994 aarch64_classify_symbol (rtx x, rtx offset,
6995 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6997 if (GET_CODE (x) == LABEL_REF)
6999 switch (aarch64_cmodel)
7001 case AARCH64_CMODEL_LARGE:
7002 return SYMBOL_FORCE_TO_MEM;
7004 case AARCH64_CMODEL_TINY_PIC:
7005 case AARCH64_CMODEL_TINY:
7006 return SYMBOL_TINY_ABSOLUTE;
7008 case AARCH64_CMODEL_SMALL_PIC:
7009 case AARCH64_CMODEL_SMALL:
7010 return SYMBOL_SMALL_ABSOLUTE;
7012 default:
7013 gcc_unreachable ();
7017 if (GET_CODE (x) == SYMBOL_REF)
7019 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
7020 return SYMBOL_FORCE_TO_MEM;
7022 if (aarch64_tls_symbol_p (x))
7023 return aarch64_classify_tls_symbol (x);
7025 switch (aarch64_cmodel)
7027 case AARCH64_CMODEL_TINY:
7028 /* When we retreive symbol + offset address, we have to make sure
7029 the offset does not cause overflow of the final address. But
7030 we have no way of knowing the address of symbol at compile time
7031 so we can't accurately say if the distance between the PC and
7032 symbol + offset is outside the addressible range of +/-1M in the
7033 TINY code model. So we rely on images not being greater than
7034 1M and cap the offset at 1M and anything beyond 1M will have to
7035 be loaded using an alternative mechanism. */
7036 if (SYMBOL_REF_WEAK (x)
7037 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
7038 return SYMBOL_FORCE_TO_MEM;
7039 return SYMBOL_TINY_ABSOLUTE;
7041 case AARCH64_CMODEL_SMALL:
7042 /* Same reasoning as the tiny code model, but the offset cap here is
7043 4G. */
7044 if (SYMBOL_REF_WEAK (x)
7045 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
7046 HOST_WIDE_INT_C (4294967264)))
7047 return SYMBOL_FORCE_TO_MEM;
7048 return SYMBOL_SMALL_ABSOLUTE;
7050 case AARCH64_CMODEL_TINY_PIC:
7051 if (!aarch64_symbol_binds_local_p (x))
7052 return SYMBOL_TINY_GOT;
7053 return SYMBOL_TINY_ABSOLUTE;
7055 case AARCH64_CMODEL_SMALL_PIC:
7056 if (!aarch64_symbol_binds_local_p (x))
7057 return SYMBOL_SMALL_GOT;
7058 return SYMBOL_SMALL_ABSOLUTE;
7060 default:
7061 gcc_unreachable ();
7065 /* By default push everything into the constant pool. */
7066 return SYMBOL_FORCE_TO_MEM;
7069 bool
7070 aarch64_constant_address_p (rtx x)
7072 return (CONSTANT_P (x) && memory_address_p (DImode, x));
7075 bool
7076 aarch64_legitimate_pic_operand_p (rtx x)
7078 if (GET_CODE (x) == SYMBOL_REF
7079 || (GET_CODE (x) == CONST
7080 && GET_CODE (XEXP (x, 0)) == PLUS
7081 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7082 return false;
7084 return true;
7087 /* Return true if X holds either a quarter-precision or
7088 floating-point +0.0 constant. */
7089 static bool
7090 aarch64_valid_floating_const (machine_mode mode, rtx x)
7092 if (!CONST_DOUBLE_P (x))
7093 return false;
7095 /* TODO: We could handle moving 0.0 to a TFmode register,
7096 but first we would like to refactor the movtf_aarch64
7097 to be more amicable to split moves properly and
7098 correctly gate on TARGET_SIMD. For now - reject all
7099 constants which are not to SFmode or DFmode registers. */
7100 if (!(mode == SFmode || mode == DFmode))
7101 return false;
7103 if (aarch64_float_const_zero_rtx_p (x))
7104 return true;
7105 return aarch64_float_const_representable_p (x);
7108 static bool
7109 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
7111 /* Do not allow vector struct mode constants. We could support
7112 0 and -1 easily, but they need support in aarch64-simd.md. */
7113 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7114 return false;
7116 /* This could probably go away because
7117 we now decompose CONST_INTs according to expand_mov_immediate. */
7118 if ((GET_CODE (x) == CONST_VECTOR
7119 && aarch64_simd_valid_immediate (x, mode, false, NULL))
7120 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7121 return !targetm.cannot_force_const_mem (mode, x);
7123 if (GET_CODE (x) == HIGH
7124 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7125 return true;
7127 return aarch64_constant_address_p (x);
7131 aarch64_load_tp (rtx target)
7133 if (!target
7134 || GET_MODE (target) != Pmode
7135 || !register_operand (target, Pmode))
7136 target = gen_reg_rtx (Pmode);
7138 /* Can return in any reg. */
7139 emit_insn (gen_aarch64_load_tp_hard (target));
7140 return target;
7143 /* On AAPCS systems, this is the "struct __va_list". */
7144 static GTY(()) tree va_list_type;
7146 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7147 Return the type to use as __builtin_va_list.
7149 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7151 struct __va_list
7153 void *__stack;
7154 void *__gr_top;
7155 void *__vr_top;
7156 int __gr_offs;
7157 int __vr_offs;
7158 }; */
7160 static tree
7161 aarch64_build_builtin_va_list (void)
7163 tree va_list_name;
7164 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7166 /* Create the type. */
7167 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7168 /* Give it the required name. */
7169 va_list_name = build_decl (BUILTINS_LOCATION,
7170 TYPE_DECL,
7171 get_identifier ("__va_list"),
7172 va_list_type);
7173 DECL_ARTIFICIAL (va_list_name) = 1;
7174 TYPE_NAME (va_list_type) = va_list_name;
7175 TYPE_STUB_DECL (va_list_type) = va_list_name;
7177 /* Create the fields. */
7178 f_stack = build_decl (BUILTINS_LOCATION,
7179 FIELD_DECL, get_identifier ("__stack"),
7180 ptr_type_node);
7181 f_grtop = build_decl (BUILTINS_LOCATION,
7182 FIELD_DECL, get_identifier ("__gr_top"),
7183 ptr_type_node);
7184 f_vrtop = build_decl (BUILTINS_LOCATION,
7185 FIELD_DECL, get_identifier ("__vr_top"),
7186 ptr_type_node);
7187 f_groff = build_decl (BUILTINS_LOCATION,
7188 FIELD_DECL, get_identifier ("__gr_offs"),
7189 integer_type_node);
7190 f_vroff = build_decl (BUILTINS_LOCATION,
7191 FIELD_DECL, get_identifier ("__vr_offs"),
7192 integer_type_node);
7194 DECL_ARTIFICIAL (f_stack) = 1;
7195 DECL_ARTIFICIAL (f_grtop) = 1;
7196 DECL_ARTIFICIAL (f_vrtop) = 1;
7197 DECL_ARTIFICIAL (f_groff) = 1;
7198 DECL_ARTIFICIAL (f_vroff) = 1;
7200 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7201 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7202 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7203 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7204 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7206 TYPE_FIELDS (va_list_type) = f_stack;
7207 DECL_CHAIN (f_stack) = f_grtop;
7208 DECL_CHAIN (f_grtop) = f_vrtop;
7209 DECL_CHAIN (f_vrtop) = f_groff;
7210 DECL_CHAIN (f_groff) = f_vroff;
7212 /* Compute its layout. */
7213 layout_type (va_list_type);
7215 return va_list_type;
7218 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7219 static void
7220 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7222 const CUMULATIVE_ARGS *cum;
7223 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7224 tree stack, grtop, vrtop, groff, vroff;
7225 tree t;
7226 int gr_save_area_size;
7227 int vr_save_area_size;
7228 int vr_offset;
7230 cum = &crtl->args.info;
7231 gr_save_area_size
7232 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7233 vr_save_area_size
7234 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7236 if (TARGET_GENERAL_REGS_ONLY)
7238 if (cum->aapcs_nvrn > 0)
7239 sorry ("%qs and floating point or vector arguments",
7240 "-mgeneral-regs-only");
7241 vr_save_area_size = 0;
7244 f_stack = TYPE_FIELDS (va_list_type_node);
7245 f_grtop = DECL_CHAIN (f_stack);
7246 f_vrtop = DECL_CHAIN (f_grtop);
7247 f_groff = DECL_CHAIN (f_vrtop);
7248 f_vroff = DECL_CHAIN (f_groff);
7250 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7251 NULL_TREE);
7252 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7253 NULL_TREE);
7254 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7255 NULL_TREE);
7256 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7257 NULL_TREE);
7258 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7259 NULL_TREE);
7261 /* Emit code to initialize STACK, which points to the next varargs stack
7262 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7263 by named arguments. STACK is 8-byte aligned. */
7264 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7265 if (cum->aapcs_stack_size > 0)
7266 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7267 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7270 /* Emit code to initialize GRTOP, the top of the GR save area.
7271 virtual_incoming_args_rtx should have been 16 byte aligned. */
7272 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7273 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7276 /* Emit code to initialize VRTOP, the top of the VR save area.
7277 This address is gr_save_area_bytes below GRTOP, rounded
7278 down to the next 16-byte boundary. */
7279 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7280 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7281 STACK_BOUNDARY / BITS_PER_UNIT);
7283 if (vr_offset)
7284 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7285 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7288 /* Emit code to initialize GROFF, the offset from GRTOP of the
7289 next GPR argument. */
7290 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7291 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7292 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7294 /* Likewise emit code to initialize VROFF, the offset from FTOP
7295 of the next VR argument. */
7296 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7297 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7298 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7301 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7303 static tree
7304 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7305 gimple_seq *post_p ATTRIBUTE_UNUSED)
7307 tree addr;
7308 bool indirect_p;
7309 bool is_ha; /* is HFA or HVA. */
7310 bool dw_align; /* double-word align. */
7311 machine_mode ag_mode = VOIDmode;
7312 int nregs;
7313 machine_mode mode;
7315 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7316 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7317 HOST_WIDE_INT size, rsize, adjust, align;
7318 tree t, u, cond1, cond2;
7320 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7321 if (indirect_p)
7322 type = build_pointer_type (type);
7324 mode = TYPE_MODE (type);
7326 f_stack = TYPE_FIELDS (va_list_type_node);
7327 f_grtop = DECL_CHAIN (f_stack);
7328 f_vrtop = DECL_CHAIN (f_grtop);
7329 f_groff = DECL_CHAIN (f_vrtop);
7330 f_vroff = DECL_CHAIN (f_groff);
7332 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7333 f_stack, NULL_TREE);
7334 size = int_size_in_bytes (type);
7335 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7337 dw_align = false;
7338 adjust = 0;
7339 if (aarch64_vfp_is_call_or_return_candidate (mode,
7340 type,
7341 &ag_mode,
7342 &nregs,
7343 &is_ha))
7345 /* TYPE passed in fp/simd registers. */
7346 if (TARGET_GENERAL_REGS_ONLY)
7347 sorry ("%qs and floating point or vector arguments",
7348 "-mgeneral-regs-only");
7350 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7351 unshare_expr (valist), f_vrtop, NULL_TREE);
7352 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7353 unshare_expr (valist), f_vroff, NULL_TREE);
7355 rsize = nregs * UNITS_PER_VREG;
7357 if (is_ha)
7359 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7360 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7362 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7363 && size < UNITS_PER_VREG)
7365 adjust = UNITS_PER_VREG - size;
7368 else
7370 /* TYPE passed in general registers. */
7371 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7372 unshare_expr (valist), f_grtop, NULL_TREE);
7373 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7374 unshare_expr (valist), f_groff, NULL_TREE);
7375 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7376 nregs = rsize / UNITS_PER_WORD;
7378 if (align > 8)
7379 dw_align = true;
7381 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7382 && size < UNITS_PER_WORD)
7384 adjust = UNITS_PER_WORD - size;
7388 /* Get a local temporary for the field value. */
7389 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7391 /* Emit code to branch if off >= 0. */
7392 t = build2 (GE_EXPR, boolean_type_node, off,
7393 build_int_cst (TREE_TYPE (off), 0));
7394 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7396 if (dw_align)
7398 /* Emit: offs = (offs + 15) & -16. */
7399 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7400 build_int_cst (TREE_TYPE (off), 15));
7401 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7402 build_int_cst (TREE_TYPE (off), -16));
7403 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7405 else
7406 roundup = NULL;
7408 /* Update ap.__[g|v]r_offs */
7409 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7410 build_int_cst (TREE_TYPE (off), rsize));
7411 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7413 /* String up. */
7414 if (roundup)
7415 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7417 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7418 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7419 build_int_cst (TREE_TYPE (f_off), 0));
7420 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7422 /* String up: make sure the assignment happens before the use. */
7423 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7424 COND_EXPR_ELSE (cond1) = t;
7426 /* Prepare the trees handling the argument that is passed on the stack;
7427 the top level node will store in ON_STACK. */
7428 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7429 if (align > 8)
7431 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7432 t = fold_convert (intDI_type_node, arg);
7433 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7434 build_int_cst (TREE_TYPE (t), 15));
7435 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7436 build_int_cst (TREE_TYPE (t), -16));
7437 t = fold_convert (TREE_TYPE (arg), t);
7438 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7440 else
7441 roundup = NULL;
7442 /* Advance ap.__stack */
7443 t = fold_convert (intDI_type_node, arg);
7444 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7445 build_int_cst (TREE_TYPE (t), size + 7));
7446 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7447 build_int_cst (TREE_TYPE (t), -8));
7448 t = fold_convert (TREE_TYPE (arg), t);
7449 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7450 /* String up roundup and advance. */
7451 if (roundup)
7452 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7453 /* String up with arg */
7454 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7455 /* Big-endianness related address adjustment. */
7456 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7457 && size < UNITS_PER_WORD)
7459 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7460 size_int (UNITS_PER_WORD - size));
7461 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7464 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7465 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7467 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7468 t = off;
7469 if (adjust)
7470 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7471 build_int_cst (TREE_TYPE (off), adjust));
7473 t = fold_convert (sizetype, t);
7474 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7476 if (is_ha)
7478 /* type ha; // treat as "struct {ftype field[n];}"
7479 ... [computing offs]
7480 for (i = 0; i <nregs; ++i, offs += 16)
7481 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7482 return ha; */
7483 int i;
7484 tree tmp_ha, field_t, field_ptr_t;
7486 /* Declare a local variable. */
7487 tmp_ha = create_tmp_var_raw (type, "ha");
7488 gimple_add_tmp_var (tmp_ha);
7490 /* Establish the base type. */
7491 switch (ag_mode)
7493 case SFmode:
7494 field_t = float_type_node;
7495 field_ptr_t = float_ptr_type_node;
7496 break;
7497 case DFmode:
7498 field_t = double_type_node;
7499 field_ptr_t = double_ptr_type_node;
7500 break;
7501 case TFmode:
7502 field_t = long_double_type_node;
7503 field_ptr_t = long_double_ptr_type_node;
7504 break;
7505 /* The half precision and quad precision are not fully supported yet. Enable
7506 the following code after the support is complete. Need to find the correct
7507 type node for __fp16 *. */
7508 #if 0
7509 case HFmode:
7510 field_t = float_type_node;
7511 field_ptr_t = float_ptr_type_node;
7512 break;
7513 #endif
7514 case V2SImode:
7515 case V4SImode:
7517 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7518 field_t = build_vector_type_for_mode (innertype, ag_mode);
7519 field_ptr_t = build_pointer_type (field_t);
7521 break;
7522 default:
7523 gcc_assert (0);
7526 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7527 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7528 addr = t;
7529 t = fold_convert (field_ptr_t, addr);
7530 t = build2 (MODIFY_EXPR, field_t,
7531 build1 (INDIRECT_REF, field_t, tmp_ha),
7532 build1 (INDIRECT_REF, field_t, t));
7534 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7535 for (i = 1; i < nregs; ++i)
7537 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7538 u = fold_convert (field_ptr_t, addr);
7539 u = build2 (MODIFY_EXPR, field_t,
7540 build2 (MEM_REF, field_t, tmp_ha,
7541 build_int_cst (field_ptr_t,
7542 (i *
7543 int_size_in_bytes (field_t)))),
7544 build1 (INDIRECT_REF, field_t, u));
7545 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7548 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7549 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7552 COND_EXPR_ELSE (cond2) = t;
7553 addr = fold_convert (build_pointer_type (type), cond1);
7554 addr = build_va_arg_indirect_ref (addr);
7556 if (indirect_p)
7557 addr = build_va_arg_indirect_ref (addr);
7559 return addr;
7562 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7564 static void
7565 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7566 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7567 int no_rtl)
7569 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7570 CUMULATIVE_ARGS local_cum;
7571 int gr_saved, vr_saved;
7573 /* The caller has advanced CUM up to, but not beyond, the last named
7574 argument. Advance a local copy of CUM past the last "real" named
7575 argument, to find out how many registers are left over. */
7576 local_cum = *cum;
7577 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7579 /* Found out how many registers we need to save. */
7580 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7581 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7583 if (TARGET_GENERAL_REGS_ONLY)
7585 if (local_cum.aapcs_nvrn > 0)
7586 sorry ("%qs and floating point or vector arguments",
7587 "-mgeneral-regs-only");
7588 vr_saved = 0;
7591 if (!no_rtl)
7593 if (gr_saved > 0)
7595 rtx ptr, mem;
7597 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7598 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7599 - gr_saved * UNITS_PER_WORD);
7600 mem = gen_frame_mem (BLKmode, ptr);
7601 set_mem_alias_set (mem, get_varargs_alias_set ());
7603 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7604 mem, gr_saved);
7606 if (vr_saved > 0)
7608 /* We can't use move_block_from_reg, because it will use
7609 the wrong mode, storing D regs only. */
7610 machine_mode mode = TImode;
7611 int off, i;
7613 /* Set OFF to the offset from virtual_incoming_args_rtx of
7614 the first vector register. The VR save area lies below
7615 the GR one, and is aligned to 16 bytes. */
7616 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7617 STACK_BOUNDARY / BITS_PER_UNIT);
7618 off -= vr_saved * UNITS_PER_VREG;
7620 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7622 rtx ptr, mem;
7624 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7625 mem = gen_frame_mem (mode, ptr);
7626 set_mem_alias_set (mem, get_varargs_alias_set ());
7627 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7628 off += UNITS_PER_VREG;
7633 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7634 any complication of having crtl->args.pretend_args_size changed. */
7635 cfun->machine->frame.saved_varargs_size
7636 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7637 STACK_BOUNDARY / BITS_PER_UNIT)
7638 + vr_saved * UNITS_PER_VREG);
7641 static void
7642 aarch64_conditional_register_usage (void)
7644 int i;
7645 if (!TARGET_FLOAT)
7647 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7649 fixed_regs[i] = 1;
7650 call_used_regs[i] = 1;
7655 /* Walk down the type tree of TYPE counting consecutive base elements.
7656 If *MODEP is VOIDmode, then set it to the first valid floating point
7657 type. If a non-floating point type is found, or if a floating point
7658 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7659 otherwise return the count in the sub-tree. */
7660 static int
7661 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7663 machine_mode mode;
7664 HOST_WIDE_INT size;
7666 switch (TREE_CODE (type))
7668 case REAL_TYPE:
7669 mode = TYPE_MODE (type);
7670 if (mode != DFmode && mode != SFmode && mode != TFmode)
7671 return -1;
7673 if (*modep == VOIDmode)
7674 *modep = mode;
7676 if (*modep == mode)
7677 return 1;
7679 break;
7681 case COMPLEX_TYPE:
7682 mode = TYPE_MODE (TREE_TYPE (type));
7683 if (mode != DFmode && mode != SFmode && mode != TFmode)
7684 return -1;
7686 if (*modep == VOIDmode)
7687 *modep = mode;
7689 if (*modep == mode)
7690 return 2;
7692 break;
7694 case VECTOR_TYPE:
7695 /* Use V2SImode and V4SImode as representatives of all 64-bit
7696 and 128-bit vector types. */
7697 size = int_size_in_bytes (type);
7698 switch (size)
7700 case 8:
7701 mode = V2SImode;
7702 break;
7703 case 16:
7704 mode = V4SImode;
7705 break;
7706 default:
7707 return -1;
7710 if (*modep == VOIDmode)
7711 *modep = mode;
7713 /* Vector modes are considered to be opaque: two vectors are
7714 equivalent for the purposes of being homogeneous aggregates
7715 if they are the same size. */
7716 if (*modep == mode)
7717 return 1;
7719 break;
7721 case ARRAY_TYPE:
7723 int count;
7724 tree index = TYPE_DOMAIN (type);
7726 /* Can't handle incomplete types nor sizes that are not
7727 fixed. */
7728 if (!COMPLETE_TYPE_P (type)
7729 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7730 return -1;
7732 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7733 if (count == -1
7734 || !index
7735 || !TYPE_MAX_VALUE (index)
7736 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7737 || !TYPE_MIN_VALUE (index)
7738 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7739 || count < 0)
7740 return -1;
7742 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7743 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7745 /* There must be no padding. */
7746 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7747 return -1;
7749 return count;
7752 case RECORD_TYPE:
7754 int count = 0;
7755 int sub_count;
7756 tree field;
7758 /* Can't handle incomplete types nor sizes that are not
7759 fixed. */
7760 if (!COMPLETE_TYPE_P (type)
7761 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7762 return -1;
7764 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7766 if (TREE_CODE (field) != FIELD_DECL)
7767 continue;
7769 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7770 if (sub_count < 0)
7771 return -1;
7772 count += sub_count;
7775 /* There must be no padding. */
7776 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7777 return -1;
7779 return count;
7782 case UNION_TYPE:
7783 case QUAL_UNION_TYPE:
7785 /* These aren't very interesting except in a degenerate case. */
7786 int count = 0;
7787 int sub_count;
7788 tree field;
7790 /* Can't handle incomplete types nor sizes that are not
7791 fixed. */
7792 if (!COMPLETE_TYPE_P (type)
7793 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7794 return -1;
7796 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7798 if (TREE_CODE (field) != FIELD_DECL)
7799 continue;
7801 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7802 if (sub_count < 0)
7803 return -1;
7804 count = count > sub_count ? count : sub_count;
7807 /* There must be no padding. */
7808 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7809 return -1;
7811 return count;
7814 default:
7815 break;
7818 return -1;
7821 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7822 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7823 array types. The C99 floating-point complex types are also considered
7824 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7825 types, which are GCC extensions and out of the scope of AAPCS64, are
7826 treated as composite types here as well.
7828 Note that MODE itself is not sufficient in determining whether a type
7829 is such a composite type or not. This is because
7830 stor-layout.c:compute_record_mode may have already changed the MODE
7831 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7832 structure with only one field may have its MODE set to the mode of the
7833 field. Also an integer mode whose size matches the size of the
7834 RECORD_TYPE type may be used to substitute the original mode
7835 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7836 solely relied on. */
7838 static bool
7839 aarch64_composite_type_p (const_tree type,
7840 machine_mode mode)
7842 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7843 return true;
7845 if (mode == BLKmode
7846 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7847 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7848 return true;
7850 return false;
7853 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7854 type as described in AAPCS64 \S 4.1.2.
7856 See the comment above aarch64_composite_type_p for the notes on MODE. */
7858 static bool
7859 aarch64_short_vector_p (const_tree type,
7860 machine_mode mode)
7862 HOST_WIDE_INT size = -1;
7864 if (type && TREE_CODE (type) == VECTOR_TYPE)
7865 size = int_size_in_bytes (type);
7866 else if (!aarch64_composite_type_p (type, mode)
7867 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7868 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7869 size = GET_MODE_SIZE (mode);
7871 return (size == 8 || size == 16) ? true : false;
7874 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7875 shall be passed or returned in simd/fp register(s) (providing these
7876 parameter passing registers are available).
7878 Upon successful return, *COUNT returns the number of needed registers,
7879 *BASE_MODE returns the mode of the individual register and when IS_HAF
7880 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7881 floating-point aggregate or a homogeneous short-vector aggregate. */
7883 static bool
7884 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7885 const_tree type,
7886 machine_mode *base_mode,
7887 int *count,
7888 bool *is_ha)
7890 machine_mode new_mode = VOIDmode;
7891 bool composite_p = aarch64_composite_type_p (type, mode);
7893 if (is_ha != NULL) *is_ha = false;
7895 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7896 || aarch64_short_vector_p (type, mode))
7898 *count = 1;
7899 new_mode = mode;
7901 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7903 if (is_ha != NULL) *is_ha = true;
7904 *count = 2;
7905 new_mode = GET_MODE_INNER (mode);
7907 else if (type && composite_p)
7909 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7911 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7913 if (is_ha != NULL) *is_ha = true;
7914 *count = ag_count;
7916 else
7917 return false;
7919 else
7920 return false;
7922 *base_mode = new_mode;
7923 return true;
7926 /* Implement TARGET_STRUCT_VALUE_RTX. */
7928 static rtx
7929 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7930 int incoming ATTRIBUTE_UNUSED)
7932 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7935 /* Implements target hook vector_mode_supported_p. */
7936 static bool
7937 aarch64_vector_mode_supported_p (machine_mode mode)
7939 if (TARGET_SIMD
7940 && (mode == V4SImode || mode == V8HImode
7941 || mode == V16QImode || mode == V2DImode
7942 || mode == V2SImode || mode == V4HImode
7943 || mode == V8QImode || mode == V2SFmode
7944 || mode == V4SFmode || mode == V2DFmode
7945 || mode == V1DFmode))
7946 return true;
7948 return false;
7951 /* Return appropriate SIMD container
7952 for MODE within a vector of WIDTH bits. */
7953 static machine_mode
7954 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7956 gcc_assert (width == 64 || width == 128);
7957 if (TARGET_SIMD)
7959 if (width == 128)
7960 switch (mode)
7962 case DFmode:
7963 return V2DFmode;
7964 case SFmode:
7965 return V4SFmode;
7966 case SImode:
7967 return V4SImode;
7968 case HImode:
7969 return V8HImode;
7970 case QImode:
7971 return V16QImode;
7972 case DImode:
7973 return V2DImode;
7974 default:
7975 break;
7977 else
7978 switch (mode)
7980 case SFmode:
7981 return V2SFmode;
7982 case SImode:
7983 return V2SImode;
7984 case HImode:
7985 return V4HImode;
7986 case QImode:
7987 return V8QImode;
7988 default:
7989 break;
7992 return word_mode;
7995 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7996 static machine_mode
7997 aarch64_preferred_simd_mode (machine_mode mode)
7999 return aarch64_simd_container_mode (mode, 128);
8002 /* Return the bitmask of possible vector sizes for the vectorizer
8003 to iterate over. */
8004 static unsigned int
8005 aarch64_autovectorize_vector_sizes (void)
8007 return (16 | 8);
8010 /* Implement TARGET_MANGLE_TYPE. */
8012 static const char *
8013 aarch64_mangle_type (const_tree type)
8015 /* The AArch64 ABI documents say that "__va_list" has to be
8016 managled as if it is in the "std" namespace. */
8017 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
8018 return "St9__va_list";
8020 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8021 builtin types. */
8022 if (TYPE_NAME (type) != NULL)
8023 return aarch64_mangle_builtin_type (type);
8025 /* Use the default mangling. */
8026 return NULL;
8030 /* Return true if the rtx_insn contains a MEM RTX somewhere
8031 in it. */
8033 static bool
8034 has_memory_op (rtx_insn *mem_insn)
8036 subrtx_iterator::array_type array;
8037 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
8038 if (MEM_P (*iter))
8039 return true;
8041 return false;
8044 /* Find the first rtx_insn before insn that will generate an assembly
8045 instruction. */
8047 static rtx_insn *
8048 aarch64_prev_real_insn (rtx_insn *insn)
8050 if (!insn)
8051 return NULL;
8055 insn = prev_real_insn (insn);
8057 while (insn && recog_memoized (insn) < 0);
8059 return insn;
8062 static bool
8063 is_madd_op (enum attr_type t1)
8065 unsigned int i;
8066 /* A number of these may be AArch32 only. */
8067 enum attr_type mlatypes[] = {
8068 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8069 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8070 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8073 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8075 if (t1 == mlatypes[i])
8076 return true;
8079 return false;
8082 /* Check if there is a register dependency between a load and the insn
8083 for which we hold recog_data. */
8085 static bool
8086 dep_between_memop_and_curr (rtx memop)
8088 rtx load_reg;
8089 int opno;
8091 gcc_assert (GET_CODE (memop) == SET);
8093 if (!REG_P (SET_DEST (memop)))
8094 return false;
8096 load_reg = SET_DEST (memop);
8097 for (opno = 1; opno < recog_data.n_operands; opno++)
8099 rtx operand = recog_data.operand[opno];
8100 if (REG_P (operand)
8101 && reg_overlap_mentioned_p (load_reg, operand))
8102 return true;
8105 return false;
8109 /* When working around the Cortex-A53 erratum 835769,
8110 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8111 instruction and has a preceding memory instruction such that a NOP
8112 should be inserted between them. */
8114 bool
8115 aarch64_madd_needs_nop (rtx_insn* insn)
8117 enum attr_type attr_type;
8118 rtx_insn *prev;
8119 rtx body;
8121 if (!aarch64_fix_a53_err835769)
8122 return false;
8124 if (recog_memoized (insn) < 0)
8125 return false;
8127 attr_type = get_attr_type (insn);
8128 if (!is_madd_op (attr_type))
8129 return false;
8131 prev = aarch64_prev_real_insn (insn);
8132 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8133 Restore recog state to INSN to avoid state corruption. */
8134 extract_constrain_insn_cached (insn);
8136 if (!prev || !has_memory_op (prev))
8137 return false;
8139 body = single_set (prev);
8141 /* If the previous insn is a memory op and there is no dependency between
8142 it and the DImode madd, emit a NOP between them. If body is NULL then we
8143 have a complex memory operation, probably a load/store pair.
8144 Be conservative for now and emit a NOP. */
8145 if (GET_MODE (recog_data.operand[0]) == DImode
8146 && (!body || !dep_between_memop_and_curr (body)))
8147 return true;
8149 return false;
8154 /* Implement FINAL_PRESCAN_INSN. */
8156 void
8157 aarch64_final_prescan_insn (rtx_insn *insn)
8159 if (aarch64_madd_needs_nop (insn))
8160 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8164 /* Return the equivalent letter for size. */
8165 static char
8166 sizetochar (int size)
8168 switch (size)
8170 case 64: return 'd';
8171 case 32: return 's';
8172 case 16: return 'h';
8173 case 8 : return 'b';
8174 default: gcc_unreachable ();
8178 /* Return true iff x is a uniform vector of floating-point
8179 constants, and the constant can be represented in
8180 quarter-precision form. Note, as aarch64_float_const_representable
8181 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8182 static bool
8183 aarch64_vect_float_const_representable_p (rtx x)
8185 int i = 0;
8186 REAL_VALUE_TYPE r0, ri;
8187 rtx x0, xi;
8189 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8190 return false;
8192 x0 = CONST_VECTOR_ELT (x, 0);
8193 if (!CONST_DOUBLE_P (x0))
8194 return false;
8196 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8198 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8200 xi = CONST_VECTOR_ELT (x, i);
8201 if (!CONST_DOUBLE_P (xi))
8202 return false;
8204 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8205 if (!REAL_VALUES_EQUAL (r0, ri))
8206 return false;
8209 return aarch64_float_const_representable_p (x0);
8212 /* Return true for valid and false for invalid. */
8213 bool
8214 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
8215 struct simd_immediate_info *info)
8217 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8218 matches = 1; \
8219 for (i = 0; i < idx; i += (STRIDE)) \
8220 if (!(TEST)) \
8221 matches = 0; \
8222 if (matches) \
8224 immtype = (CLASS); \
8225 elsize = (ELSIZE); \
8226 eshift = (SHIFT); \
8227 emvn = (NEG); \
8228 break; \
8231 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8232 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8233 unsigned char bytes[16];
8234 int immtype = -1, matches;
8235 unsigned int invmask = inverse ? 0xff : 0;
8236 int eshift, emvn;
8238 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8240 if (! (aarch64_simd_imm_zero_p (op, mode)
8241 || aarch64_vect_float_const_representable_p (op)))
8242 return false;
8244 if (info)
8246 info->value = CONST_VECTOR_ELT (op, 0);
8247 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
8248 info->mvn = false;
8249 info->shift = 0;
8252 return true;
8255 /* Splat vector constant out into a byte vector. */
8256 for (i = 0; i < n_elts; i++)
8258 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8259 it must be laid out in the vector register in reverse order. */
8260 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
8261 unsigned HOST_WIDE_INT elpart;
8262 unsigned int part, parts;
8264 if (CONST_INT_P (el))
8266 elpart = INTVAL (el);
8267 parts = 1;
8269 else if (GET_CODE (el) == CONST_DOUBLE)
8271 elpart = CONST_DOUBLE_LOW (el);
8272 parts = 2;
8274 else
8275 gcc_unreachable ();
8277 for (part = 0; part < parts; part++)
8279 unsigned int byte;
8280 for (byte = 0; byte < innersize; byte++)
8282 bytes[idx++] = (elpart & 0xff) ^ invmask;
8283 elpart >>= BITS_PER_UNIT;
8285 if (GET_CODE (el) == CONST_DOUBLE)
8286 elpart = CONST_DOUBLE_HIGH (el);
8290 /* Sanity check. */
8291 gcc_assert (idx == GET_MODE_SIZE (mode));
8295 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8296 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8298 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8299 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8301 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8302 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8304 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8305 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8307 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8309 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8311 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8312 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8314 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8315 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8317 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8318 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8320 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8321 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8323 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8325 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8327 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8328 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8330 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8331 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8333 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8334 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8336 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8337 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8339 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8341 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8342 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8344 while (0);
8346 if (immtype == -1)
8347 return false;
8349 if (info)
8351 info->element_width = elsize;
8352 info->mvn = emvn != 0;
8353 info->shift = eshift;
8355 unsigned HOST_WIDE_INT imm = 0;
8357 if (immtype >= 12 && immtype <= 15)
8358 info->msl = true;
8360 /* Un-invert bytes of recognized vector, if necessary. */
8361 if (invmask != 0)
8362 for (i = 0; i < idx; i++)
8363 bytes[i] ^= invmask;
8365 if (immtype == 17)
8367 /* FIXME: Broken on 32-bit H_W_I hosts. */
8368 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8370 for (i = 0; i < 8; i++)
8371 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8372 << (i * BITS_PER_UNIT);
8375 info->value = GEN_INT (imm);
8377 else
8379 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8380 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8382 /* Construct 'abcdefgh' because the assembler cannot handle
8383 generic constants. */
8384 if (info->mvn)
8385 imm = ~imm;
8386 imm = (imm >> info->shift) & 0xff;
8387 info->value = GEN_INT (imm);
8391 return true;
8392 #undef CHECK
8395 /* Check of immediate shift constants are within range. */
8396 bool
8397 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8399 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8400 if (left)
8401 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8402 else
8403 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8406 /* Return true if X is a uniform vector where all elements
8407 are either the floating-point constant 0.0 or the
8408 integer constant 0. */
8409 bool
8410 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8412 return x == CONST0_RTX (mode);
8415 bool
8416 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8418 HOST_WIDE_INT imm = INTVAL (x);
8419 int i;
8421 for (i = 0; i < 8; i++)
8423 unsigned int byte = imm & 0xff;
8424 if (byte != 0xff && byte != 0)
8425 return false;
8426 imm >>= 8;
8429 return true;
8432 bool
8433 aarch64_mov_operand_p (rtx x,
8434 enum aarch64_symbol_context context,
8435 machine_mode mode)
8437 if (GET_CODE (x) == HIGH
8438 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8439 return true;
8441 if (CONST_INT_P (x))
8442 return true;
8444 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8445 return true;
8447 return aarch64_classify_symbolic_expression (x, context)
8448 == SYMBOL_TINY_ABSOLUTE;
8451 /* Return a const_int vector of VAL. */
8453 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8455 int nunits = GET_MODE_NUNITS (mode);
8456 rtvec v = rtvec_alloc (nunits);
8457 int i;
8459 for (i=0; i < nunits; i++)
8460 RTVEC_ELT (v, i) = GEN_INT (val);
8462 return gen_rtx_CONST_VECTOR (mode, v);
8465 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8467 bool
8468 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8470 machine_mode vmode;
8472 gcc_assert (!VECTOR_MODE_P (mode));
8473 vmode = aarch64_preferred_simd_mode (mode);
8474 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8475 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8478 /* Construct and return a PARALLEL RTX vector with elements numbering the
8479 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8480 the vector - from the perspective of the architecture. This does not
8481 line up with GCC's perspective on lane numbers, so we end up with
8482 different masks depending on our target endian-ness. The diagram
8483 below may help. We must draw the distinction when building masks
8484 which select one half of the vector. An instruction selecting
8485 architectural low-lanes for a big-endian target, must be described using
8486 a mask selecting GCC high-lanes.
8488 Big-Endian Little-Endian
8490 GCC 0 1 2 3 3 2 1 0
8491 | x | x | x | x | | x | x | x | x |
8492 Architecture 3 2 1 0 3 2 1 0
8494 Low Mask: { 2, 3 } { 0, 1 }
8495 High Mask: { 0, 1 } { 2, 3 }
8499 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8501 int nunits = GET_MODE_NUNITS (mode);
8502 rtvec v = rtvec_alloc (nunits / 2);
8503 int high_base = nunits / 2;
8504 int low_base = 0;
8505 int base;
8506 rtx t1;
8507 int i;
8509 if (BYTES_BIG_ENDIAN)
8510 base = high ? low_base : high_base;
8511 else
8512 base = high ? high_base : low_base;
8514 for (i = 0; i < nunits / 2; i++)
8515 RTVEC_ELT (v, i) = GEN_INT (base + i);
8517 t1 = gen_rtx_PARALLEL (mode, v);
8518 return t1;
8521 /* Check OP for validity as a PARALLEL RTX vector with elements
8522 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8523 from the perspective of the architecture. See the diagram above
8524 aarch64_simd_vect_par_cnst_half for more details. */
8526 bool
8527 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8528 bool high)
8530 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8531 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8532 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8533 int i = 0;
8535 if (!VECTOR_MODE_P (mode))
8536 return false;
8538 if (count_op != count_ideal)
8539 return false;
8541 for (i = 0; i < count_ideal; i++)
8543 rtx elt_op = XVECEXP (op, 0, i);
8544 rtx elt_ideal = XVECEXP (ideal, 0, i);
8546 if (!CONST_INT_P (elt_op)
8547 || INTVAL (elt_ideal) != INTVAL (elt_op))
8548 return false;
8550 return true;
8553 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8554 HIGH (exclusive). */
8555 void
8556 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8557 const_tree exp)
8559 HOST_WIDE_INT lane;
8560 gcc_assert (CONST_INT_P (operand));
8561 lane = INTVAL (operand);
8563 if (lane < low || lane >= high)
8565 if (exp)
8566 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8567 else
8568 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8572 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8573 registers). */
8574 void
8575 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8576 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8577 rtx op1)
8579 rtx mem = gen_rtx_MEM (mode, destaddr);
8580 rtx tmp1 = gen_reg_rtx (mode);
8581 rtx tmp2 = gen_reg_rtx (mode);
8583 emit_insn (intfn (tmp1, op1, tmp2));
8585 emit_move_insn (mem, tmp1);
8586 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8587 emit_move_insn (mem, tmp2);
8590 /* Return TRUE if OP is a valid vector addressing mode. */
8591 bool
8592 aarch64_simd_mem_operand_p (rtx op)
8594 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8595 || REG_P (XEXP (op, 0)));
8598 /* Emit a register copy from operand to operand, taking care not to
8599 early-clobber source registers in the process.
8601 COUNT is the number of components into which the copy needs to be
8602 decomposed. */
8603 void
8604 aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
8605 unsigned int count)
8607 unsigned int i;
8608 int rdest = REGNO (operands[0]);
8609 int rsrc = REGNO (operands[1]);
8611 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8612 || rdest < rsrc)
8613 for (i = 0; i < count; i++)
8614 emit_move_insn (gen_rtx_REG (mode, rdest + i),
8615 gen_rtx_REG (mode, rsrc + i));
8616 else
8617 for (i = 0; i < count; i++)
8618 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
8619 gen_rtx_REG (mode, rsrc + count - i - 1));
8622 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8623 one of VSTRUCT modes: OI, CI or XI. */
8625 aarch64_simd_attr_length_move (rtx_insn *insn)
8627 machine_mode mode;
8629 extract_insn_cached (insn);
8631 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8633 mode = GET_MODE (recog_data.operand[0]);
8634 switch (mode)
8636 case OImode:
8637 return 8;
8638 case CImode:
8639 return 12;
8640 case XImode:
8641 return 16;
8642 default:
8643 gcc_unreachable ();
8646 return 4;
8649 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8650 one of VSTRUCT modes: OI, CI, EI, or XI. */
8652 aarch64_simd_attr_length_rglist (enum machine_mode mode)
8654 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
8657 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8658 alignment of a vector to 128 bits. */
8659 static HOST_WIDE_INT
8660 aarch64_simd_vector_alignment (const_tree type)
8662 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8663 return MIN (align, 128);
8666 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8667 static bool
8668 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8670 if (is_packed)
8671 return false;
8673 /* We guarantee alignment for vectors up to 128-bits. */
8674 if (tree_int_cst_compare (TYPE_SIZE (type),
8675 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8676 return false;
8678 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8679 return true;
8682 /* If VALS is a vector constant that can be loaded into a register
8683 using DUP, generate instructions to do so and return an RTX to
8684 assign to the register. Otherwise return NULL_RTX. */
8685 static rtx
8686 aarch64_simd_dup_constant (rtx vals)
8688 machine_mode mode = GET_MODE (vals);
8689 machine_mode inner_mode = GET_MODE_INNER (mode);
8690 int n_elts = GET_MODE_NUNITS (mode);
8691 bool all_same = true;
8692 rtx x;
8693 int i;
8695 if (GET_CODE (vals) != CONST_VECTOR)
8696 return NULL_RTX;
8698 for (i = 1; i < n_elts; ++i)
8700 x = CONST_VECTOR_ELT (vals, i);
8701 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8702 all_same = false;
8705 if (!all_same)
8706 return NULL_RTX;
8708 /* We can load this constant by using DUP and a constant in a
8709 single ARM register. This will be cheaper than a vector
8710 load. */
8711 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8712 return gen_rtx_VEC_DUPLICATE (mode, x);
8716 /* Generate code to load VALS, which is a PARALLEL containing only
8717 constants (for vec_init) or CONST_VECTOR, efficiently into a
8718 register. Returns an RTX to copy into the register, or NULL_RTX
8719 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8720 static rtx
8721 aarch64_simd_make_constant (rtx vals)
8723 machine_mode mode = GET_MODE (vals);
8724 rtx const_dup;
8725 rtx const_vec = NULL_RTX;
8726 int n_elts = GET_MODE_NUNITS (mode);
8727 int n_const = 0;
8728 int i;
8730 if (GET_CODE (vals) == CONST_VECTOR)
8731 const_vec = vals;
8732 else if (GET_CODE (vals) == PARALLEL)
8734 /* A CONST_VECTOR must contain only CONST_INTs and
8735 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8736 Only store valid constants in a CONST_VECTOR. */
8737 for (i = 0; i < n_elts; ++i)
8739 rtx x = XVECEXP (vals, 0, i);
8740 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8741 n_const++;
8743 if (n_const == n_elts)
8744 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8746 else
8747 gcc_unreachable ();
8749 if (const_vec != NULL_RTX
8750 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8751 /* Load using MOVI/MVNI. */
8752 return const_vec;
8753 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8754 /* Loaded using DUP. */
8755 return const_dup;
8756 else if (const_vec != NULL_RTX)
8757 /* Load from constant pool. We can not take advantage of single-cycle
8758 LD1 because we need a PC-relative addressing mode. */
8759 return const_vec;
8760 else
8761 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8762 We can not construct an initializer. */
8763 return NULL_RTX;
8766 void
8767 aarch64_expand_vector_init (rtx target, rtx vals)
8769 machine_mode mode = GET_MODE (target);
8770 machine_mode inner_mode = GET_MODE_INNER (mode);
8771 int n_elts = GET_MODE_NUNITS (mode);
8772 int n_var = 0, one_var = -1;
8773 bool all_same = true;
8774 rtx x, mem;
8775 int i;
8777 x = XVECEXP (vals, 0, 0);
8778 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8779 n_var = 1, one_var = 0;
8781 for (i = 1; i < n_elts; ++i)
8783 x = XVECEXP (vals, 0, i);
8784 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8785 ++n_var, one_var = i;
8787 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8788 all_same = false;
8791 if (n_var == 0)
8793 rtx constant = aarch64_simd_make_constant (vals);
8794 if (constant != NULL_RTX)
8796 emit_move_insn (target, constant);
8797 return;
8801 /* Splat a single non-constant element if we can. */
8802 if (all_same)
8804 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8805 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8806 return;
8809 /* One field is non-constant. Load constant then overwrite varying
8810 field. This is more efficient than using the stack. */
8811 if (n_var == 1)
8813 rtx copy = copy_rtx (vals);
8814 rtx index = GEN_INT (one_var);
8815 enum insn_code icode;
8817 /* Load constant part of vector, substitute neighboring value for
8818 varying element. */
8819 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8820 aarch64_expand_vector_init (target, copy);
8822 /* Insert variable. */
8823 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8824 icode = optab_handler (vec_set_optab, mode);
8825 gcc_assert (icode != CODE_FOR_nothing);
8826 emit_insn (GEN_FCN (icode) (target, x, index));
8827 return;
8830 /* Construct the vector in memory one field at a time
8831 and load the whole vector. */
8832 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8833 for (i = 0; i < n_elts; i++)
8834 emit_move_insn (adjust_address_nv (mem, inner_mode,
8835 i * GET_MODE_SIZE (inner_mode)),
8836 XVECEXP (vals, 0, i));
8837 emit_move_insn (target, mem);
8841 static unsigned HOST_WIDE_INT
8842 aarch64_shift_truncation_mask (machine_mode mode)
8844 return
8845 (aarch64_vector_mode_supported_p (mode)
8846 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8849 #ifndef TLS_SECTION_ASM_FLAG
8850 #define TLS_SECTION_ASM_FLAG 'T'
8851 #endif
8853 void
8854 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8855 tree decl ATTRIBUTE_UNUSED)
8857 char flagchars[10], *f = flagchars;
8859 /* If we have already declared this section, we can use an
8860 abbreviated form to switch back to it -- unless this section is
8861 part of a COMDAT groups, in which case GAS requires the full
8862 declaration every time. */
8863 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8864 && (flags & SECTION_DECLARED))
8866 fprintf (asm_out_file, "\t.section\t%s\n", name);
8867 return;
8870 if (!(flags & SECTION_DEBUG))
8871 *f++ = 'a';
8872 if (flags & SECTION_WRITE)
8873 *f++ = 'w';
8874 if (flags & SECTION_CODE)
8875 *f++ = 'x';
8876 if (flags & SECTION_SMALL)
8877 *f++ = 's';
8878 if (flags & SECTION_MERGE)
8879 *f++ = 'M';
8880 if (flags & SECTION_STRINGS)
8881 *f++ = 'S';
8882 if (flags & SECTION_TLS)
8883 *f++ = TLS_SECTION_ASM_FLAG;
8884 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8885 *f++ = 'G';
8886 *f = '\0';
8888 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8890 if (!(flags & SECTION_NOTYPE))
8892 const char *type;
8893 const char *format;
8895 if (flags & SECTION_BSS)
8896 type = "nobits";
8897 else
8898 type = "progbits";
8900 #ifdef TYPE_OPERAND_FMT
8901 format = "," TYPE_OPERAND_FMT;
8902 #else
8903 format = ",@%s";
8904 #endif
8906 fprintf (asm_out_file, format, type);
8908 if (flags & SECTION_ENTSIZE)
8909 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8910 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8912 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8913 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8914 else
8915 fprintf (asm_out_file, ",%s,comdat",
8916 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8920 putc ('\n', asm_out_file);
8923 /* Select a format to encode pointers in exception handling data. */
8925 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8927 int type;
8928 switch (aarch64_cmodel)
8930 case AARCH64_CMODEL_TINY:
8931 case AARCH64_CMODEL_TINY_PIC:
8932 case AARCH64_CMODEL_SMALL:
8933 case AARCH64_CMODEL_SMALL_PIC:
8934 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8935 for everything. */
8936 type = DW_EH_PE_sdata4;
8937 break;
8938 default:
8939 /* No assumptions here. 8-byte relocs required. */
8940 type = DW_EH_PE_sdata8;
8941 break;
8943 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8946 /* Emit load exclusive. */
8948 static void
8949 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8950 rtx mem, rtx model_rtx)
8952 rtx (*gen) (rtx, rtx, rtx);
8954 switch (mode)
8956 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8957 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8958 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8959 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8960 default:
8961 gcc_unreachable ();
8964 emit_insn (gen (rval, mem, model_rtx));
8967 /* Emit store exclusive. */
8969 static void
8970 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
8971 rtx rval, rtx mem, rtx model_rtx)
8973 rtx (*gen) (rtx, rtx, rtx, rtx);
8975 switch (mode)
8977 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8978 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8979 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8980 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8981 default:
8982 gcc_unreachable ();
8985 emit_insn (gen (bval, rval, mem, model_rtx));
8988 /* Mark the previous jump instruction as unlikely. */
8990 static void
8991 aarch64_emit_unlikely_jump (rtx insn)
8993 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8995 insn = emit_jump_insn (insn);
8996 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8999 /* Expand a compare and swap pattern. */
9001 void
9002 aarch64_expand_compare_and_swap (rtx operands[])
9004 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
9005 machine_mode mode, cmp_mode;
9006 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
9008 bval = operands[0];
9009 rval = operands[1];
9010 mem = operands[2];
9011 oldval = operands[3];
9012 newval = operands[4];
9013 is_weak = operands[5];
9014 mod_s = operands[6];
9015 mod_f = operands[7];
9016 mode = GET_MODE (mem);
9017 cmp_mode = mode;
9019 /* Normally the succ memory model must be stronger than fail, but in the
9020 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9021 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9023 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
9024 && INTVAL (mod_s) == MEMMODEL_RELEASE)
9025 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
9027 switch (mode)
9029 case QImode:
9030 case HImode:
9031 /* For short modes, we're going to perform the comparison in SImode,
9032 so do the zero-extension now. */
9033 cmp_mode = SImode;
9034 rval = gen_reg_rtx (SImode);
9035 oldval = convert_modes (SImode, mode, oldval, true);
9036 /* Fall through. */
9038 case SImode:
9039 case DImode:
9040 /* Force the value into a register if needed. */
9041 if (!aarch64_plus_operand (oldval, mode))
9042 oldval = force_reg (cmp_mode, oldval);
9043 break;
9045 default:
9046 gcc_unreachable ();
9049 switch (mode)
9051 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
9052 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
9053 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
9054 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
9055 default:
9056 gcc_unreachable ();
9059 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
9061 if (mode == QImode || mode == HImode)
9062 emit_move_insn (operands[1], gen_lowpart (mode, rval));
9064 x = gen_rtx_REG (CCmode, CC_REGNUM);
9065 x = gen_rtx_EQ (SImode, x, const0_rtx);
9066 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
9069 /* Split a compare and swap pattern. */
9071 void
9072 aarch64_split_compare_and_swap (rtx operands[])
9074 rtx rval, mem, oldval, newval, scratch;
9075 machine_mode mode;
9076 bool is_weak;
9077 rtx_code_label *label1, *label2;
9078 rtx x, cond;
9080 rval = operands[0];
9081 mem = operands[1];
9082 oldval = operands[2];
9083 newval = operands[3];
9084 is_weak = (operands[4] != const0_rtx);
9085 scratch = operands[7];
9086 mode = GET_MODE (mem);
9088 label1 = NULL;
9089 if (!is_weak)
9091 label1 = gen_label_rtx ();
9092 emit_label (label1);
9094 label2 = gen_label_rtx ();
9096 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9098 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9099 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9100 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9101 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9102 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9104 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9106 if (!is_weak)
9108 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9109 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9110 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9111 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9113 else
9115 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9116 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9117 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9120 emit_label (label2);
9123 /* Split an atomic operation. */
9125 void
9126 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9127 rtx value, rtx model_rtx, rtx cond)
9129 machine_mode mode = GET_MODE (mem);
9130 machine_mode wmode = (mode == DImode ? DImode : SImode);
9131 rtx_code_label *label;
9132 rtx x;
9134 label = gen_label_rtx ();
9135 emit_label (label);
9137 if (new_out)
9138 new_out = gen_lowpart (wmode, new_out);
9139 if (old_out)
9140 old_out = gen_lowpart (wmode, old_out);
9141 else
9142 old_out = new_out;
9143 value = simplify_gen_subreg (wmode, value, mode, 0);
9145 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9147 switch (code)
9149 case SET:
9150 new_out = value;
9151 break;
9153 case NOT:
9154 x = gen_rtx_AND (wmode, old_out, value);
9155 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9156 x = gen_rtx_NOT (wmode, new_out);
9157 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9158 break;
9160 case MINUS:
9161 if (CONST_INT_P (value))
9163 value = GEN_INT (-INTVAL (value));
9164 code = PLUS;
9166 /* Fall through. */
9168 default:
9169 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9170 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9171 break;
9174 aarch64_emit_store_exclusive (mode, cond, mem,
9175 gen_lowpart (mode, new_out), model_rtx);
9177 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9178 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9179 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9180 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9183 static void
9184 aarch64_print_extension (void)
9186 const struct aarch64_option_extension *opt = NULL;
9188 for (opt = all_extensions; opt->name != NULL; opt++)
9189 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9190 asm_fprintf (asm_out_file, "+%s", opt->name);
9192 asm_fprintf (asm_out_file, "\n");
9195 static void
9196 aarch64_start_file (void)
9198 if (selected_arch)
9200 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9201 aarch64_print_extension ();
9203 else if (selected_cpu)
9205 const char *truncated_name
9206 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9207 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
9208 aarch64_print_extension ();
9210 default_file_start();
9213 /* Target hook for c_mode_for_suffix. */
9214 static machine_mode
9215 aarch64_c_mode_for_suffix (char suffix)
9217 if (suffix == 'q')
9218 return TFmode;
9220 return VOIDmode;
9223 /* We can only represent floating point constants which will fit in
9224 "quarter-precision" values. These values are characterised by
9225 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9228 (-1)^s * (n/16) * 2^r
9230 Where:
9231 's' is the sign bit.
9232 'n' is an integer in the range 16 <= n <= 31.
9233 'r' is an integer in the range -3 <= r <= 4. */
9235 /* Return true iff X can be represented by a quarter-precision
9236 floating point immediate operand X. Note, we cannot represent 0.0. */
9237 bool
9238 aarch64_float_const_representable_p (rtx x)
9240 /* This represents our current view of how many bits
9241 make up the mantissa. */
9242 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9243 int exponent;
9244 unsigned HOST_WIDE_INT mantissa, mask;
9245 REAL_VALUE_TYPE r, m;
9246 bool fail;
9248 if (!CONST_DOUBLE_P (x))
9249 return false;
9251 if (GET_MODE (x) == VOIDmode)
9252 return false;
9254 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9256 /* We cannot represent infinities, NaNs or +/-zero. We won't
9257 know if we have +zero until we analyse the mantissa, but we
9258 can reject the other invalid values. */
9259 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9260 || REAL_VALUE_MINUS_ZERO (r))
9261 return false;
9263 /* Extract exponent. */
9264 r = real_value_abs (&r);
9265 exponent = REAL_EXP (&r);
9267 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9268 highest (sign) bit, with a fixed binary point at bit point_pos.
9269 m1 holds the low part of the mantissa, m2 the high part.
9270 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9271 bits for the mantissa, this can fail (low bits will be lost). */
9272 real_ldexp (&m, &r, point_pos - exponent);
9273 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
9275 /* If the low part of the mantissa has bits set we cannot represent
9276 the value. */
9277 if (w.elt (0) != 0)
9278 return false;
9279 /* We have rejected the lower HOST_WIDE_INT, so update our
9280 understanding of how many bits lie in the mantissa and
9281 look only at the high HOST_WIDE_INT. */
9282 mantissa = w.elt (1);
9283 point_pos -= HOST_BITS_PER_WIDE_INT;
9285 /* We can only represent values with a mantissa of the form 1.xxxx. */
9286 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9287 if ((mantissa & mask) != 0)
9288 return false;
9290 /* Having filtered unrepresentable values, we may now remove all
9291 but the highest 5 bits. */
9292 mantissa >>= point_pos - 5;
9294 /* We cannot represent the value 0.0, so reject it. This is handled
9295 elsewhere. */
9296 if (mantissa == 0)
9297 return false;
9299 /* Then, as bit 4 is always set, we can mask it off, leaving
9300 the mantissa in the range [0, 15]. */
9301 mantissa &= ~(1 << 4);
9302 gcc_assert (mantissa <= 15);
9304 /* GCC internally does not use IEEE754-like encoding (where normalized
9305 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9306 Our mantissa values are shifted 4 places to the left relative to
9307 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9308 by 5 places to correct for GCC's representation. */
9309 exponent = 5 - exponent;
9311 return (exponent >= 0 && exponent <= 7);
9314 char*
9315 aarch64_output_simd_mov_immediate (rtx const_vector,
9316 machine_mode mode,
9317 unsigned width)
9319 bool is_valid;
9320 static char templ[40];
9321 const char *mnemonic;
9322 const char *shift_op;
9323 unsigned int lane_count = 0;
9324 char element_char;
9326 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
9328 /* This will return true to show const_vector is legal for use as either
9329 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9330 also update INFO to show how the immediate should be generated. */
9331 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
9332 gcc_assert (is_valid);
9334 element_char = sizetochar (info.element_width);
9335 lane_count = width / info.element_width;
9337 mode = GET_MODE_INNER (mode);
9338 if (mode == SFmode || mode == DFmode)
9340 gcc_assert (info.shift == 0 && ! info.mvn);
9341 if (aarch64_float_const_zero_rtx_p (info.value))
9342 info.value = GEN_INT (0);
9343 else
9345 #define buf_size 20
9346 REAL_VALUE_TYPE r;
9347 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9348 char float_buf[buf_size] = {'\0'};
9349 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9350 #undef buf_size
9352 if (lane_count == 1)
9353 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9354 else
9355 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
9356 lane_count, element_char, float_buf);
9357 return templ;
9361 mnemonic = info.mvn ? "mvni" : "movi";
9362 shift_op = info.msl ? "msl" : "lsl";
9364 if (lane_count == 1)
9365 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9366 mnemonic, UINTVAL (info.value));
9367 else if (info.shift)
9368 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9369 ", %s %d", mnemonic, lane_count, element_char,
9370 UINTVAL (info.value), shift_op, info.shift);
9371 else
9372 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9373 mnemonic, lane_count, element_char, UINTVAL (info.value));
9374 return templ;
9377 char*
9378 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9379 machine_mode mode)
9381 machine_mode vmode;
9383 gcc_assert (!VECTOR_MODE_P (mode));
9384 vmode = aarch64_simd_container_mode (mode, 64);
9385 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9386 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9389 /* Split operands into moves from op[1] + op[2] into op[0]. */
9391 void
9392 aarch64_split_combinev16qi (rtx operands[3])
9394 unsigned int dest = REGNO (operands[0]);
9395 unsigned int src1 = REGNO (operands[1]);
9396 unsigned int src2 = REGNO (operands[2]);
9397 machine_mode halfmode = GET_MODE (operands[1]);
9398 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9399 rtx destlo, desthi;
9401 gcc_assert (halfmode == V16QImode);
9403 if (src1 == dest && src2 == dest + halfregs)
9405 /* No-op move. Can't split to nothing; emit something. */
9406 emit_note (NOTE_INSN_DELETED);
9407 return;
9410 /* Preserve register attributes for variable tracking. */
9411 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9412 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9413 GET_MODE_SIZE (halfmode));
9415 /* Special case of reversed high/low parts. */
9416 if (reg_overlap_mentioned_p (operands[2], destlo)
9417 && reg_overlap_mentioned_p (operands[1], desthi))
9419 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9420 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9421 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9423 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9425 /* Try to avoid unnecessary moves if part of the result
9426 is in the right place already. */
9427 if (src1 != dest)
9428 emit_move_insn (destlo, operands[1]);
9429 if (src2 != dest + halfregs)
9430 emit_move_insn (desthi, operands[2]);
9432 else
9434 if (src2 != dest + halfregs)
9435 emit_move_insn (desthi, operands[2]);
9436 if (src1 != dest)
9437 emit_move_insn (destlo, operands[1]);
9441 /* vec_perm support. */
9443 #define MAX_VECT_LEN 16
9445 struct expand_vec_perm_d
9447 rtx target, op0, op1;
9448 unsigned char perm[MAX_VECT_LEN];
9449 machine_mode vmode;
9450 unsigned char nelt;
9451 bool one_vector_p;
9452 bool testing_p;
9455 /* Generate a variable permutation. */
9457 static void
9458 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9460 machine_mode vmode = GET_MODE (target);
9461 bool one_vector_p = rtx_equal_p (op0, op1);
9463 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9464 gcc_checking_assert (GET_MODE (op0) == vmode);
9465 gcc_checking_assert (GET_MODE (op1) == vmode);
9466 gcc_checking_assert (GET_MODE (sel) == vmode);
9467 gcc_checking_assert (TARGET_SIMD);
9469 if (one_vector_p)
9471 if (vmode == V8QImode)
9473 /* Expand the argument to a V16QI mode by duplicating it. */
9474 rtx pair = gen_reg_rtx (V16QImode);
9475 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9476 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9478 else
9480 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9483 else
9485 rtx pair;
9487 if (vmode == V8QImode)
9489 pair = gen_reg_rtx (V16QImode);
9490 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9491 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9493 else
9495 pair = gen_reg_rtx (OImode);
9496 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9497 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9502 void
9503 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9505 machine_mode vmode = GET_MODE (target);
9506 unsigned int nelt = GET_MODE_NUNITS (vmode);
9507 bool one_vector_p = rtx_equal_p (op0, op1);
9508 rtx mask;
9510 /* The TBL instruction does not use a modulo index, so we must take care
9511 of that ourselves. */
9512 mask = aarch64_simd_gen_const_vector_dup (vmode,
9513 one_vector_p ? nelt - 1 : 2 * nelt - 1);
9514 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9516 /* For big-endian, we also need to reverse the index within the vector
9517 (but not which vector). */
9518 if (BYTES_BIG_ENDIAN)
9520 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9521 if (!one_vector_p)
9522 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9523 sel = expand_simple_binop (vmode, XOR, sel, mask,
9524 NULL, 0, OPTAB_LIB_WIDEN);
9526 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9529 /* Recognize patterns suitable for the TRN instructions. */
9530 static bool
9531 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9533 unsigned int i, odd, mask, nelt = d->nelt;
9534 rtx out, in0, in1, x;
9535 rtx (*gen) (rtx, rtx, rtx);
9536 machine_mode vmode = d->vmode;
9538 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9539 return false;
9541 /* Note that these are little-endian tests.
9542 We correct for big-endian later. */
9543 if (d->perm[0] == 0)
9544 odd = 0;
9545 else if (d->perm[0] == 1)
9546 odd = 1;
9547 else
9548 return false;
9549 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9551 for (i = 0; i < nelt; i += 2)
9553 if (d->perm[i] != i + odd)
9554 return false;
9555 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9556 return false;
9559 /* Success! */
9560 if (d->testing_p)
9561 return true;
9563 in0 = d->op0;
9564 in1 = d->op1;
9565 if (BYTES_BIG_ENDIAN)
9567 x = in0, in0 = in1, in1 = x;
9568 odd = !odd;
9570 out = d->target;
9572 if (odd)
9574 switch (vmode)
9576 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9577 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9578 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9579 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9580 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9581 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9582 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9583 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9584 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9585 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9586 default:
9587 return false;
9590 else
9592 switch (vmode)
9594 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9595 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9596 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9597 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9598 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9599 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9600 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9601 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9602 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9603 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9604 default:
9605 return false;
9609 emit_insn (gen (out, in0, in1));
9610 return true;
9613 /* Recognize patterns suitable for the UZP instructions. */
9614 static bool
9615 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9617 unsigned int i, odd, mask, nelt = d->nelt;
9618 rtx out, in0, in1, x;
9619 rtx (*gen) (rtx, rtx, rtx);
9620 machine_mode vmode = d->vmode;
9622 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9623 return false;
9625 /* Note that these are little-endian tests.
9626 We correct for big-endian later. */
9627 if (d->perm[0] == 0)
9628 odd = 0;
9629 else if (d->perm[0] == 1)
9630 odd = 1;
9631 else
9632 return false;
9633 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9635 for (i = 0; i < nelt; i++)
9637 unsigned elt = (i * 2 + odd) & mask;
9638 if (d->perm[i] != elt)
9639 return false;
9642 /* Success! */
9643 if (d->testing_p)
9644 return true;
9646 in0 = d->op0;
9647 in1 = d->op1;
9648 if (BYTES_BIG_ENDIAN)
9650 x = in0, in0 = in1, in1 = x;
9651 odd = !odd;
9653 out = d->target;
9655 if (odd)
9657 switch (vmode)
9659 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9660 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9661 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9662 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9663 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9664 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9665 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9666 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9667 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9668 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9669 default:
9670 return false;
9673 else
9675 switch (vmode)
9677 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9678 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9679 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9680 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9681 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9682 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9683 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9684 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9685 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9686 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9687 default:
9688 return false;
9692 emit_insn (gen (out, in0, in1));
9693 return true;
9696 /* Recognize patterns suitable for the ZIP instructions. */
9697 static bool
9698 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9700 unsigned int i, high, mask, nelt = d->nelt;
9701 rtx out, in0, in1, x;
9702 rtx (*gen) (rtx, rtx, rtx);
9703 machine_mode vmode = d->vmode;
9705 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9706 return false;
9708 /* Note that these are little-endian tests.
9709 We correct for big-endian later. */
9710 high = nelt / 2;
9711 if (d->perm[0] == high)
9712 /* Do Nothing. */
9714 else if (d->perm[0] == 0)
9715 high = 0;
9716 else
9717 return false;
9718 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9720 for (i = 0; i < nelt / 2; i++)
9722 unsigned elt = (i + high) & mask;
9723 if (d->perm[i * 2] != elt)
9724 return false;
9725 elt = (elt + nelt) & mask;
9726 if (d->perm[i * 2 + 1] != elt)
9727 return false;
9730 /* Success! */
9731 if (d->testing_p)
9732 return true;
9734 in0 = d->op0;
9735 in1 = d->op1;
9736 if (BYTES_BIG_ENDIAN)
9738 x = in0, in0 = in1, in1 = x;
9739 high = !high;
9741 out = d->target;
9743 if (high)
9745 switch (vmode)
9747 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9748 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9749 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9750 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9751 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9752 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9753 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9754 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9755 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9756 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9757 default:
9758 return false;
9761 else
9763 switch (vmode)
9765 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9766 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9767 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9768 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9769 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9770 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9771 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9772 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9773 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9774 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9775 default:
9776 return false;
9780 emit_insn (gen (out, in0, in1));
9781 return true;
9784 /* Recognize patterns for the EXT insn. */
9786 static bool
9787 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9789 unsigned int i, nelt = d->nelt;
9790 rtx (*gen) (rtx, rtx, rtx, rtx);
9791 rtx offset;
9793 unsigned int location = d->perm[0]; /* Always < nelt. */
9795 /* Check if the extracted indices are increasing by one. */
9796 for (i = 1; i < nelt; i++)
9798 unsigned int required = location + i;
9799 if (d->one_vector_p)
9801 /* We'll pass the same vector in twice, so allow indices to wrap. */
9802 required &= (nelt - 1);
9804 if (d->perm[i] != required)
9805 return false;
9808 switch (d->vmode)
9810 case V16QImode: gen = gen_aarch64_extv16qi; break;
9811 case V8QImode: gen = gen_aarch64_extv8qi; break;
9812 case V4HImode: gen = gen_aarch64_extv4hi; break;
9813 case V8HImode: gen = gen_aarch64_extv8hi; break;
9814 case V2SImode: gen = gen_aarch64_extv2si; break;
9815 case V4SImode: gen = gen_aarch64_extv4si; break;
9816 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9817 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9818 case V2DImode: gen = gen_aarch64_extv2di; break;
9819 case V2DFmode: gen = gen_aarch64_extv2df; break;
9820 default:
9821 return false;
9824 /* Success! */
9825 if (d->testing_p)
9826 return true;
9828 /* The case where (location == 0) is a no-op for both big- and little-endian,
9829 and is removed by the mid-end at optimization levels -O1 and higher. */
9831 if (BYTES_BIG_ENDIAN && (location != 0))
9833 /* After setup, we want the high elements of the first vector (stored
9834 at the LSB end of the register), and the low elements of the second
9835 vector (stored at the MSB end of the register). So swap. */
9836 std::swap (d->op0, d->op1);
9837 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9838 location = nelt - location;
9841 offset = GEN_INT (location);
9842 emit_insn (gen (d->target, d->op0, d->op1, offset));
9843 return true;
9846 /* Recognize patterns for the REV insns. */
9848 static bool
9849 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9851 unsigned int i, j, diff, nelt = d->nelt;
9852 rtx (*gen) (rtx, rtx);
9854 if (!d->one_vector_p)
9855 return false;
9857 diff = d->perm[0];
9858 switch (diff)
9860 case 7:
9861 switch (d->vmode)
9863 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9864 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9865 default:
9866 return false;
9868 break;
9869 case 3:
9870 switch (d->vmode)
9872 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9873 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9874 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9875 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9876 default:
9877 return false;
9879 break;
9880 case 1:
9881 switch (d->vmode)
9883 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9884 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9885 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9886 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9887 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9888 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9889 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9890 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9891 default:
9892 return false;
9894 break;
9895 default:
9896 return false;
9899 for (i = 0; i < nelt ; i += diff + 1)
9900 for (j = 0; j <= diff; j += 1)
9902 /* This is guaranteed to be true as the value of diff
9903 is 7, 3, 1 and we should have enough elements in the
9904 queue to generate this. Getting a vector mask with a
9905 value of diff other than these values implies that
9906 something is wrong by the time we get here. */
9907 gcc_assert (i + j < nelt);
9908 if (d->perm[i + j] != i + diff - j)
9909 return false;
9912 /* Success! */
9913 if (d->testing_p)
9914 return true;
9916 emit_insn (gen (d->target, d->op0));
9917 return true;
9920 static bool
9921 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9923 rtx (*gen) (rtx, rtx, rtx);
9924 rtx out = d->target;
9925 rtx in0;
9926 machine_mode vmode = d->vmode;
9927 unsigned int i, elt, nelt = d->nelt;
9928 rtx lane;
9930 elt = d->perm[0];
9931 for (i = 1; i < nelt; i++)
9933 if (elt != d->perm[i])
9934 return false;
9937 /* The generic preparation in aarch64_expand_vec_perm_const_1
9938 swaps the operand order and the permute indices if it finds
9939 d->perm[0] to be in the second operand. Thus, we can always
9940 use d->op0 and need not do any extra arithmetic to get the
9941 correct lane number. */
9942 in0 = d->op0;
9943 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
9945 switch (vmode)
9947 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9948 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9949 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9950 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9951 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9952 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9953 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9954 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9955 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9956 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9957 default:
9958 return false;
9961 emit_insn (gen (out, in0, lane));
9962 return true;
9965 static bool
9966 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9968 rtx rperm[MAX_VECT_LEN], sel;
9969 machine_mode vmode = d->vmode;
9970 unsigned int i, nelt = d->nelt;
9972 if (d->testing_p)
9973 return true;
9975 /* Generic code will try constant permutation twice. Once with the
9976 original mode and again with the elements lowered to QImode.
9977 So wait and don't do the selector expansion ourselves. */
9978 if (vmode != V8QImode && vmode != V16QImode)
9979 return false;
9981 for (i = 0; i < nelt; ++i)
9983 int nunits = GET_MODE_NUNITS (vmode);
9985 /* If big-endian and two vectors we end up with a weird mixed-endian
9986 mode on NEON. Reverse the index within each word but not the word
9987 itself. */
9988 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9989 : d->perm[i]);
9991 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9992 sel = force_reg (vmode, sel);
9994 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9995 return true;
9998 static bool
9999 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
10001 /* The pattern matching functions above are written to look for a small
10002 number to begin the sequence (0, 1, N/2). If we begin with an index
10003 from the second operand, we can swap the operands. */
10004 if (d->perm[0] >= d->nelt)
10006 unsigned i, nelt = d->nelt;
10008 gcc_assert (nelt == (nelt & -nelt));
10009 for (i = 0; i < nelt; ++i)
10010 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
10012 std::swap (d->op0, d->op1);
10015 if (TARGET_SIMD)
10017 if (aarch64_evpc_rev (d))
10018 return true;
10019 else if (aarch64_evpc_ext (d))
10020 return true;
10021 else if (aarch64_evpc_dup (d))
10022 return true;
10023 else if (aarch64_evpc_zip (d))
10024 return true;
10025 else if (aarch64_evpc_uzp (d))
10026 return true;
10027 else if (aarch64_evpc_trn (d))
10028 return true;
10029 return aarch64_evpc_tbl (d);
10031 return false;
10034 /* Expand a vec_perm_const pattern. */
10036 bool
10037 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
10039 struct expand_vec_perm_d d;
10040 int i, nelt, which;
10042 d.target = target;
10043 d.op0 = op0;
10044 d.op1 = op1;
10046 d.vmode = GET_MODE (target);
10047 gcc_assert (VECTOR_MODE_P (d.vmode));
10048 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10049 d.testing_p = false;
10051 for (i = which = 0; i < nelt; ++i)
10053 rtx e = XVECEXP (sel, 0, i);
10054 int ei = INTVAL (e) & (2 * nelt - 1);
10055 which |= (ei < nelt ? 1 : 2);
10056 d.perm[i] = ei;
10059 switch (which)
10061 default:
10062 gcc_unreachable ();
10064 case 3:
10065 d.one_vector_p = false;
10066 if (!rtx_equal_p (op0, op1))
10067 break;
10069 /* The elements of PERM do not suggest that only the first operand
10070 is used, but both operands are identical. Allow easier matching
10071 of the permutation by folding the permutation into the single
10072 input vector. */
10073 /* Fall Through. */
10074 case 2:
10075 for (i = 0; i < nelt; ++i)
10076 d.perm[i] &= nelt - 1;
10077 d.op0 = op1;
10078 d.one_vector_p = true;
10079 break;
10081 case 1:
10082 d.op1 = op0;
10083 d.one_vector_p = true;
10084 break;
10087 return aarch64_expand_vec_perm_const_1 (&d);
10090 static bool
10091 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
10092 const unsigned char *sel)
10094 struct expand_vec_perm_d d;
10095 unsigned int i, nelt, which;
10096 bool ret;
10098 d.vmode = vmode;
10099 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10100 d.testing_p = true;
10101 memcpy (d.perm, sel, nelt);
10103 /* Calculate whether all elements are in one vector. */
10104 for (i = which = 0; i < nelt; ++i)
10106 unsigned char e = d.perm[i];
10107 gcc_assert (e < 2 * nelt);
10108 which |= (e < nelt ? 1 : 2);
10111 /* If all elements are from the second vector, reindex as if from the
10112 first vector. */
10113 if (which == 2)
10114 for (i = 0; i < nelt; ++i)
10115 d.perm[i] -= nelt;
10117 /* Check whether the mask can be applied to a single vector. */
10118 d.one_vector_p = (which != 3);
10120 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10121 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10122 if (!d.one_vector_p)
10123 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10125 start_sequence ();
10126 ret = aarch64_expand_vec_perm_const_1 (&d);
10127 end_sequence ();
10129 return ret;
10133 aarch64_reverse_mask (enum machine_mode mode)
10135 /* We have to reverse each vector because we dont have
10136 a permuted load that can reverse-load according to ABI rules. */
10137 rtx mask;
10138 rtvec v = rtvec_alloc (16);
10139 int i, j;
10140 int nunits = GET_MODE_NUNITS (mode);
10141 int usize = GET_MODE_UNIT_SIZE (mode);
10143 gcc_assert (BYTES_BIG_ENDIAN);
10144 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
10146 for (i = 0; i < nunits; i++)
10147 for (j = 0; j < usize; j++)
10148 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
10149 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
10150 return force_reg (V16QImode, mask);
10153 /* Implement MODES_TIEABLE_P. */
10155 bool
10156 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10158 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10159 return true;
10161 /* We specifically want to allow elements of "structure" modes to
10162 be tieable to the structure. This more general condition allows
10163 other rarer situations too. */
10164 if (TARGET_SIMD
10165 && aarch64_vector_mode_p (mode1)
10166 && aarch64_vector_mode_p (mode2))
10167 return true;
10169 return false;
10172 /* Return a new RTX holding the result of moving POINTER forward by
10173 AMOUNT bytes. */
10175 static rtx
10176 aarch64_move_pointer (rtx pointer, int amount)
10178 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10180 return adjust_automodify_address (pointer, GET_MODE (pointer),
10181 next, amount);
10184 /* Return a new RTX holding the result of moving POINTER forward by the
10185 size of the mode it points to. */
10187 static rtx
10188 aarch64_progress_pointer (rtx pointer)
10190 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10192 return aarch64_move_pointer (pointer, amount);
10195 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10196 MODE bytes. */
10198 static void
10199 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
10200 machine_mode mode)
10202 rtx reg = gen_reg_rtx (mode);
10204 /* "Cast" the pointers to the correct mode. */
10205 *src = adjust_address (*src, mode, 0);
10206 *dst = adjust_address (*dst, mode, 0);
10207 /* Emit the memcpy. */
10208 emit_move_insn (reg, *src);
10209 emit_move_insn (*dst, reg);
10210 /* Move the pointers forward. */
10211 *src = aarch64_progress_pointer (*src);
10212 *dst = aarch64_progress_pointer (*dst);
10215 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10216 we succeed, otherwise return false. */
10218 bool
10219 aarch64_expand_movmem (rtx *operands)
10221 unsigned int n;
10222 rtx dst = operands[0];
10223 rtx src = operands[1];
10224 rtx base;
10225 bool speed_p = !optimize_function_for_size_p (cfun);
10227 /* When optimizing for size, give a better estimate of the length of a
10228 memcpy call, but use the default otherwise. */
10229 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10231 /* We can't do anything smart if the amount to copy is not constant. */
10232 if (!CONST_INT_P (operands[2]))
10233 return false;
10235 n = UINTVAL (operands[2]);
10237 /* Try to keep the number of instructions low. For cases below 16 bytes we
10238 need to make at most two moves. For cases above 16 bytes it will be one
10239 move for each 16 byte chunk, then at most two additional moves. */
10240 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10241 return false;
10243 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10244 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10246 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10247 src = adjust_automodify_address (src, VOIDmode, base, 0);
10249 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10250 1-byte chunk. */
10251 if (n < 4)
10253 if (n >= 2)
10255 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10256 n -= 2;
10259 if (n == 1)
10260 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10262 return true;
10265 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10266 4-byte chunk, partially overlapping with the previously copied chunk. */
10267 if (n < 8)
10269 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10270 n -= 4;
10271 if (n > 0)
10273 int move = n - 4;
10275 src = aarch64_move_pointer (src, move);
10276 dst = aarch64_move_pointer (dst, move);
10277 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10279 return true;
10282 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10283 them, then (if applicable) an 8-byte chunk. */
10284 while (n >= 8)
10286 if (n / 16)
10288 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10289 n -= 16;
10291 else
10293 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10294 n -= 8;
10298 /* Finish the final bytes of the copy. We can always do this in one
10299 instruction. We either copy the exact amount we need, or partially
10300 overlap with the previous chunk we copied and copy 8-bytes. */
10301 if (n == 0)
10302 return true;
10303 else if (n == 1)
10304 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10305 else if (n == 2)
10306 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10307 else if (n == 4)
10308 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10309 else
10311 if (n == 3)
10313 src = aarch64_move_pointer (src, -1);
10314 dst = aarch64_move_pointer (dst, -1);
10315 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10317 else
10319 int move = n - 8;
10321 src = aarch64_move_pointer (src, move);
10322 dst = aarch64_move_pointer (dst, move);
10323 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10327 return true;
10330 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10332 static unsigned HOST_WIDE_INT
10333 aarch64_asan_shadow_offset (void)
10335 return (HOST_WIDE_INT_1 << 36);
10338 static bool
10339 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10340 unsigned int align,
10341 enum by_pieces_operation op,
10342 bool speed_p)
10344 /* STORE_BY_PIECES can be used when copying a constant string, but
10345 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10346 For now we always fail this and let the move_by_pieces code copy
10347 the string from read-only memory. */
10348 if (op == STORE_BY_PIECES)
10349 return false;
10351 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10354 static enum machine_mode
10355 aarch64_code_to_ccmode (enum rtx_code code)
10357 switch (code)
10359 case NE:
10360 return CC_DNEmode;
10362 case EQ:
10363 return CC_DEQmode;
10365 case LE:
10366 return CC_DLEmode;
10368 case LT:
10369 return CC_DLTmode;
10371 case GE:
10372 return CC_DGEmode;
10374 case GT:
10375 return CC_DGTmode;
10377 case LEU:
10378 return CC_DLEUmode;
10380 case LTU:
10381 return CC_DLTUmode;
10383 case GEU:
10384 return CC_DGEUmode;
10386 case GTU:
10387 return CC_DGTUmode;
10389 default:
10390 return CCmode;
10394 static rtx
10395 aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
10396 int code, tree treeop0, tree treeop1)
10398 enum machine_mode op_mode, cmp_mode, cc_mode;
10399 rtx op0, op1, cmp, target;
10400 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10401 enum insn_code icode;
10402 struct expand_operand ops[4];
10404 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
10405 if (cc_mode == CCmode)
10406 return NULL_RTX;
10408 start_sequence ();
10409 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10411 op_mode = GET_MODE (op0);
10412 if (op_mode == VOIDmode)
10413 op_mode = GET_MODE (op1);
10415 switch (op_mode)
10417 case QImode:
10418 case HImode:
10419 case SImode:
10420 cmp_mode = SImode;
10421 icode = CODE_FOR_cmpsi;
10422 break;
10424 case DImode:
10425 cmp_mode = DImode;
10426 icode = CODE_FOR_cmpdi;
10427 break;
10429 default:
10430 end_sequence ();
10431 return NULL_RTX;
10434 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10435 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10436 if (!op0 || !op1)
10438 end_sequence ();
10439 return NULL_RTX;
10441 *prep_seq = get_insns ();
10442 end_sequence ();
10444 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
10445 target = gen_rtx_REG (CCmode, CC_REGNUM);
10447 create_output_operand (&ops[0], target, CCmode);
10448 create_fixed_operand (&ops[1], cmp);
10449 create_fixed_operand (&ops[2], op0);
10450 create_fixed_operand (&ops[3], op1);
10452 start_sequence ();
10453 if (!maybe_expand_insn (icode, 4, ops))
10455 end_sequence ();
10456 return NULL_RTX;
10458 *gen_seq = get_insns ();
10459 end_sequence ();
10461 return gen_rtx_REG (cc_mode, CC_REGNUM);
10464 static rtx
10465 aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
10466 tree treeop0, tree treeop1, int bit_code)
10468 rtx op0, op1, cmp0, cmp1, target;
10469 enum machine_mode op_mode, cmp_mode, cc_mode;
10470 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
10471 enum insn_code icode = CODE_FOR_ccmp_andsi;
10472 struct expand_operand ops[6];
10474 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
10475 if (cc_mode == CCmode)
10476 return NULL_RTX;
10478 push_to_sequence ((rtx_insn*) *prep_seq);
10479 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
10481 op_mode = GET_MODE (op0);
10482 if (op_mode == VOIDmode)
10483 op_mode = GET_MODE (op1);
10485 switch (op_mode)
10487 case QImode:
10488 case HImode:
10489 case SImode:
10490 cmp_mode = SImode;
10491 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
10492 : CODE_FOR_ccmp_iorsi;
10493 break;
10495 case DImode:
10496 cmp_mode = DImode;
10497 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
10498 : CODE_FOR_ccmp_iordi;
10499 break;
10501 default:
10502 end_sequence ();
10503 return NULL_RTX;
10506 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
10507 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
10508 if (!op0 || !op1)
10510 end_sequence ();
10511 return NULL_RTX;
10513 *prep_seq = get_insns ();
10514 end_sequence ();
10516 target = gen_rtx_REG (cc_mode, CC_REGNUM);
10517 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
10518 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
10520 create_fixed_operand (&ops[0], prev);
10521 create_fixed_operand (&ops[1], target);
10522 create_fixed_operand (&ops[2], op0);
10523 create_fixed_operand (&ops[3], op1);
10524 create_fixed_operand (&ops[4], cmp0);
10525 create_fixed_operand (&ops[5], cmp1);
10527 push_to_sequence ((rtx_insn*) *gen_seq);
10528 if (!maybe_expand_insn (icode, 6, ops))
10530 end_sequence ();
10531 return NULL_RTX;
10534 *gen_seq = get_insns ();
10535 end_sequence ();
10537 return target;
10540 #undef TARGET_GEN_CCMP_FIRST
10541 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10543 #undef TARGET_GEN_CCMP_NEXT
10544 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10546 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10547 instruction fusion of some sort. */
10549 static bool
10550 aarch64_macro_fusion_p (void)
10552 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10556 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10557 should be kept together during scheduling. */
10559 static bool
10560 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10562 rtx set_dest;
10563 rtx prev_set = single_set (prev);
10564 rtx curr_set = single_set (curr);
10565 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10566 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10568 if (!aarch64_macro_fusion_p ())
10569 return false;
10571 if (simple_sets_p
10572 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10574 /* We are trying to match:
10575 prev (mov) == (set (reg r0) (const_int imm16))
10576 curr (movk) == (set (zero_extract (reg r0)
10577 (const_int 16)
10578 (const_int 16))
10579 (const_int imm16_1)) */
10581 set_dest = SET_DEST (curr_set);
10583 if (GET_CODE (set_dest) == ZERO_EXTRACT
10584 && CONST_INT_P (SET_SRC (curr_set))
10585 && CONST_INT_P (SET_SRC (prev_set))
10586 && CONST_INT_P (XEXP (set_dest, 2))
10587 && INTVAL (XEXP (set_dest, 2)) == 16
10588 && REG_P (XEXP (set_dest, 0))
10589 && REG_P (SET_DEST (prev_set))
10590 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10592 return true;
10596 if (simple_sets_p
10597 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10600 /* We're trying to match:
10601 prev (adrp) == (set (reg r1)
10602 (high (symbol_ref ("SYM"))))
10603 curr (add) == (set (reg r0)
10604 (lo_sum (reg r1)
10605 (symbol_ref ("SYM"))))
10606 Note that r0 need not necessarily be the same as r1, especially
10607 during pre-regalloc scheduling. */
10609 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10610 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10612 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10613 && REG_P (XEXP (SET_SRC (curr_set), 0))
10614 && REGNO (XEXP (SET_SRC (curr_set), 0))
10615 == REGNO (SET_DEST (prev_set))
10616 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10617 XEXP (SET_SRC (curr_set), 1)))
10618 return true;
10622 if (simple_sets_p
10623 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10626 /* We're trying to match:
10627 prev (movk) == (set (zero_extract (reg r0)
10628 (const_int 16)
10629 (const_int 32))
10630 (const_int imm16_1))
10631 curr (movk) == (set (zero_extract (reg r0)
10632 (const_int 16)
10633 (const_int 48))
10634 (const_int imm16_2)) */
10636 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10637 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10638 && REG_P (XEXP (SET_DEST (prev_set), 0))
10639 && REG_P (XEXP (SET_DEST (curr_set), 0))
10640 && REGNO (XEXP (SET_DEST (prev_set), 0))
10641 == REGNO (XEXP (SET_DEST (curr_set), 0))
10642 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10643 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10644 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10645 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10646 && CONST_INT_P (SET_SRC (prev_set))
10647 && CONST_INT_P (SET_SRC (curr_set)))
10648 return true;
10651 if (simple_sets_p
10652 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10654 /* We're trying to match:
10655 prev (adrp) == (set (reg r0)
10656 (high (symbol_ref ("SYM"))))
10657 curr (ldr) == (set (reg r1)
10658 (mem (lo_sum (reg r0)
10659 (symbol_ref ("SYM")))))
10661 curr (ldr) == (set (reg r1)
10662 (zero_extend (mem
10663 (lo_sum (reg r0)
10664 (symbol_ref ("SYM")))))) */
10665 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10666 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10668 rtx curr_src = SET_SRC (curr_set);
10670 if (GET_CODE (curr_src) == ZERO_EXTEND)
10671 curr_src = XEXP (curr_src, 0);
10673 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10674 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10675 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10676 == REGNO (SET_DEST (prev_set))
10677 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10678 XEXP (SET_SRC (prev_set), 0)))
10679 return true;
10683 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10684 && any_condjump_p (curr))
10686 enum attr_type prev_type = get_attr_type (prev);
10688 /* FIXME: this misses some which is considered simple arthematic
10689 instructions for ThunderX. Simple shifts are missed here. */
10690 if (prev_type == TYPE_ALUS_SREG
10691 || prev_type == TYPE_ALUS_IMM
10692 || prev_type == TYPE_LOGICS_REG
10693 || prev_type == TYPE_LOGICS_IMM)
10694 return true;
10697 return false;
10700 /* If MEM is in the form of [base+offset], extract the two parts
10701 of address and set to BASE and OFFSET, otherwise return false
10702 after clearing BASE and OFFSET. */
10704 bool
10705 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10707 rtx addr;
10709 gcc_assert (MEM_P (mem));
10711 addr = XEXP (mem, 0);
10713 if (REG_P (addr))
10715 *base = addr;
10716 *offset = const0_rtx;
10717 return true;
10720 if (GET_CODE (addr) == PLUS
10721 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10723 *base = XEXP (addr, 0);
10724 *offset = XEXP (addr, 1);
10725 return true;
10728 *base = NULL_RTX;
10729 *offset = NULL_RTX;
10731 return false;
10734 /* Types for scheduling fusion. */
10735 enum sched_fusion_type
10737 SCHED_FUSION_NONE = 0,
10738 SCHED_FUSION_LD_SIGN_EXTEND,
10739 SCHED_FUSION_LD_ZERO_EXTEND,
10740 SCHED_FUSION_LD,
10741 SCHED_FUSION_ST,
10742 SCHED_FUSION_NUM
10745 /* If INSN is a load or store of address in the form of [base+offset],
10746 extract the two parts and set to BASE and OFFSET. Return scheduling
10747 fusion type this INSN is. */
10749 static enum sched_fusion_type
10750 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10752 rtx x, dest, src;
10753 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10755 gcc_assert (INSN_P (insn));
10756 x = PATTERN (insn);
10757 if (GET_CODE (x) != SET)
10758 return SCHED_FUSION_NONE;
10760 src = SET_SRC (x);
10761 dest = SET_DEST (x);
10763 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10764 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
10765 return SCHED_FUSION_NONE;
10767 if (GET_CODE (src) == SIGN_EXTEND)
10769 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10770 src = XEXP (src, 0);
10771 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10772 return SCHED_FUSION_NONE;
10774 else if (GET_CODE (src) == ZERO_EXTEND)
10776 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10777 src = XEXP (src, 0);
10778 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10779 return SCHED_FUSION_NONE;
10782 if (GET_CODE (src) == MEM && REG_P (dest))
10783 extract_base_offset_in_addr (src, base, offset);
10784 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10786 fusion = SCHED_FUSION_ST;
10787 extract_base_offset_in_addr (dest, base, offset);
10789 else
10790 return SCHED_FUSION_NONE;
10792 if (*base == NULL_RTX || *offset == NULL_RTX)
10793 fusion = SCHED_FUSION_NONE;
10795 return fusion;
10798 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10800 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10801 and PRI are only calculated for these instructions. For other instruction,
10802 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10803 type instruction fusion can be added by returning different priorities.
10805 It's important that irrelevant instructions get the largest FUSION_PRI. */
10807 static void
10808 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10809 int *fusion_pri, int *pri)
10811 int tmp, off_val;
10812 rtx base, offset;
10813 enum sched_fusion_type fusion;
10815 gcc_assert (INSN_P (insn));
10817 tmp = max_pri - 1;
10818 fusion = fusion_load_store (insn, &base, &offset);
10819 if (fusion == SCHED_FUSION_NONE)
10821 *pri = tmp;
10822 *fusion_pri = tmp;
10823 return;
10826 /* Set FUSION_PRI according to fusion type and base register. */
10827 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10829 /* Calculate PRI. */
10830 tmp /= 2;
10832 /* INSN with smaller offset goes first. */
10833 off_val = (int)(INTVAL (offset));
10834 if (off_val >= 0)
10835 tmp -= (off_val & 0xfffff);
10836 else
10837 tmp += ((- off_val) & 0xfffff);
10839 *pri = tmp;
10840 return;
10843 /* Given OPERANDS of consecutive load/store, check if we can merge
10844 them into ldp/stp. LOAD is true if they are load instructions.
10845 MODE is the mode of memory operands. */
10847 bool
10848 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10849 enum machine_mode mode)
10851 HOST_WIDE_INT offval_1, offval_2, msize;
10852 enum reg_class rclass_1, rclass_2;
10853 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10855 if (load)
10857 mem_1 = operands[1];
10858 mem_2 = operands[3];
10859 reg_1 = operands[0];
10860 reg_2 = operands[2];
10861 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10862 if (REGNO (reg_1) == REGNO (reg_2))
10863 return false;
10865 else
10867 mem_1 = operands[0];
10868 mem_2 = operands[2];
10869 reg_1 = operands[1];
10870 reg_2 = operands[3];
10873 /* The mems cannot be volatile. */
10874 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10875 return false;
10877 /* Check if the addresses are in the form of [base+offset]. */
10878 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10879 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10880 return false;
10881 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10882 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10883 return false;
10885 /* Check if the bases are same. */
10886 if (!rtx_equal_p (base_1, base_2))
10887 return false;
10889 offval_1 = INTVAL (offset_1);
10890 offval_2 = INTVAL (offset_2);
10891 msize = GET_MODE_SIZE (mode);
10892 /* Check if the offsets are consecutive. */
10893 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10894 return false;
10896 /* Check if the addresses are clobbered by load. */
10897 if (load)
10899 if (reg_mentioned_p (reg_1, mem_1))
10900 return false;
10902 /* In increasing order, the last load can clobber the address. */
10903 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10904 return false;
10907 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10908 rclass_1 = FP_REGS;
10909 else
10910 rclass_1 = GENERAL_REGS;
10912 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10913 rclass_2 = FP_REGS;
10914 else
10915 rclass_2 = GENERAL_REGS;
10917 /* Check if the registers are of same class. */
10918 if (rclass_1 != rclass_2)
10919 return false;
10921 return true;
10924 /* Given OPERANDS of consecutive load/store, check if we can merge
10925 them into ldp/stp by adjusting the offset. LOAD is true if they
10926 are load instructions. MODE is the mode of memory operands.
10928 Given below consecutive stores:
10930 str w1, [xb, 0x100]
10931 str w1, [xb, 0x104]
10932 str w1, [xb, 0x108]
10933 str w1, [xb, 0x10c]
10935 Though the offsets are out of the range supported by stp, we can
10936 still pair them after adjusting the offset, like:
10938 add scratch, xb, 0x100
10939 stp w1, w1, [scratch]
10940 stp w1, w1, [scratch, 0x8]
10942 The peephole patterns detecting this opportunity should guarantee
10943 the scratch register is avaliable. */
10945 bool
10946 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10947 enum machine_mode mode)
10949 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10950 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10951 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10952 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10954 if (load)
10956 reg_1 = operands[0];
10957 mem_1 = operands[1];
10958 reg_2 = operands[2];
10959 mem_2 = operands[3];
10960 reg_3 = operands[4];
10961 mem_3 = operands[5];
10962 reg_4 = operands[6];
10963 mem_4 = operands[7];
10964 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10965 && REG_P (reg_3) && REG_P (reg_4));
10966 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10967 return false;
10969 else
10971 mem_1 = operands[0];
10972 reg_1 = operands[1];
10973 mem_2 = operands[2];
10974 reg_2 = operands[3];
10975 mem_3 = operands[4];
10976 reg_3 = operands[5];
10977 mem_4 = operands[6];
10978 reg_4 = operands[7];
10980 /* Skip if memory operand is by itslef valid for ldp/stp. */
10981 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
10982 return false;
10984 /* The mems cannot be volatile. */
10985 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
10986 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
10987 return false;
10989 /* Check if the addresses are in the form of [base+offset]. */
10990 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10991 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10992 return false;
10993 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10994 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10995 return false;
10996 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
10997 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
10998 return false;
10999 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
11000 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
11001 return false;
11003 /* Check if the bases are same. */
11004 if (!rtx_equal_p (base_1, base_2)
11005 || !rtx_equal_p (base_2, base_3)
11006 || !rtx_equal_p (base_3, base_4))
11007 return false;
11009 offval_1 = INTVAL (offset_1);
11010 offval_2 = INTVAL (offset_2);
11011 offval_3 = INTVAL (offset_3);
11012 offval_4 = INTVAL (offset_4);
11013 msize = GET_MODE_SIZE (mode);
11014 /* Check if the offsets are consecutive. */
11015 if ((offval_1 != (offval_2 + msize)
11016 || offval_1 != (offval_3 + msize * 2)
11017 || offval_1 != (offval_4 + msize * 3))
11018 && (offval_4 != (offval_3 + msize)
11019 || offval_4 != (offval_2 + msize * 2)
11020 || offval_4 != (offval_1 + msize * 3)))
11021 return false;
11023 /* Check if the addresses are clobbered by load. */
11024 if (load)
11026 if (reg_mentioned_p (reg_1, mem_1)
11027 || reg_mentioned_p (reg_2, mem_2)
11028 || reg_mentioned_p (reg_3, mem_3))
11029 return false;
11031 /* In increasing order, the last load can clobber the address. */
11032 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
11033 return false;
11036 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
11037 rclass_1 = FP_REGS;
11038 else
11039 rclass_1 = GENERAL_REGS;
11041 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
11042 rclass_2 = FP_REGS;
11043 else
11044 rclass_2 = GENERAL_REGS;
11046 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
11047 rclass_3 = FP_REGS;
11048 else
11049 rclass_3 = GENERAL_REGS;
11051 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
11052 rclass_4 = FP_REGS;
11053 else
11054 rclass_4 = GENERAL_REGS;
11056 /* Check if the registers are of same class. */
11057 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
11058 return false;
11060 return true;
11063 /* Given OPERANDS of consecutive load/store, this function pairs them
11064 into ldp/stp after adjusting the offset. It depends on the fact
11065 that addresses of load/store instructions are in increasing order.
11066 MODE is the mode of memory operands. CODE is the rtl operator
11067 which should be applied to all memory operands, it's SIGN_EXTEND,
11068 ZERO_EXTEND or UNKNOWN. */
11070 bool
11071 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
11072 enum machine_mode mode, RTX_CODE code)
11074 rtx base, offset, t1, t2;
11075 rtx mem_1, mem_2, mem_3, mem_4;
11076 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
11078 if (load)
11080 mem_1 = operands[1];
11081 mem_2 = operands[3];
11082 mem_3 = operands[5];
11083 mem_4 = operands[7];
11085 else
11087 mem_1 = operands[0];
11088 mem_2 = operands[2];
11089 mem_3 = operands[4];
11090 mem_4 = operands[6];
11091 gcc_assert (code == UNKNOWN);
11094 extract_base_offset_in_addr (mem_1, &base, &offset);
11095 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
11097 /* Adjust offset thus it can fit in ldp/stp instruction. */
11098 msize = GET_MODE_SIZE (mode);
11099 stp_off_limit = msize * 0x40;
11100 off_val = INTVAL (offset);
11101 abs_off = (off_val < 0) ? -off_val : off_val;
11102 new_off = abs_off % stp_off_limit;
11103 adj_off = abs_off - new_off;
11105 /* Further adjust to make sure all offsets are OK. */
11106 if ((new_off + msize * 2) >= stp_off_limit)
11108 adj_off += stp_off_limit;
11109 new_off -= stp_off_limit;
11112 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11113 if (adj_off >= 0x1000)
11114 return false;
11116 if (off_val < 0)
11118 adj_off = -adj_off;
11119 new_off = -new_off;
11122 /* Create new memory references. */
11123 mem_1 = change_address (mem_1, VOIDmode,
11124 plus_constant (DImode, operands[8], new_off));
11126 /* Check if the adjusted address is OK for ldp/stp. */
11127 if (!aarch64_mem_pair_operand (mem_1, mode))
11128 return false;
11130 msize = GET_MODE_SIZE (mode);
11131 mem_2 = change_address (mem_2, VOIDmode,
11132 plus_constant (DImode,
11133 operands[8],
11134 new_off + msize));
11135 mem_3 = change_address (mem_3, VOIDmode,
11136 plus_constant (DImode,
11137 operands[8],
11138 new_off + msize * 2));
11139 mem_4 = change_address (mem_4, VOIDmode,
11140 plus_constant (DImode,
11141 operands[8],
11142 new_off + msize * 3));
11144 if (code == ZERO_EXTEND)
11146 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
11147 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
11148 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
11149 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
11151 else if (code == SIGN_EXTEND)
11153 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
11154 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
11155 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
11156 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
11159 if (load)
11161 operands[1] = mem_1;
11162 operands[3] = mem_2;
11163 operands[5] = mem_3;
11164 operands[7] = mem_4;
11166 else
11168 operands[0] = mem_1;
11169 operands[2] = mem_2;
11170 operands[4] = mem_3;
11171 operands[6] = mem_4;
11174 /* Emit adjusting instruction. */
11175 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
11176 plus_constant (DImode, base, adj_off)));
11177 /* Emit ldp/stp instructions. */
11178 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
11179 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
11180 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11181 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
11182 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
11183 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
11184 return true;
11187 #undef TARGET_ADDRESS_COST
11188 #define TARGET_ADDRESS_COST aarch64_address_cost
11190 /* This hook will determines whether unnamed bitfields affect the alignment
11191 of the containing structure. The hook returns true if the structure
11192 should inherit the alignment requirements of an unnamed bitfield's
11193 type. */
11194 #undef TARGET_ALIGN_ANON_BITFIELD
11195 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11197 #undef TARGET_ASM_ALIGNED_DI_OP
11198 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11200 #undef TARGET_ASM_ALIGNED_HI_OP
11201 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11203 #undef TARGET_ASM_ALIGNED_SI_OP
11204 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11206 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11207 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11208 hook_bool_const_tree_hwi_hwi_const_tree_true
11210 #undef TARGET_ASM_FILE_START
11211 #define TARGET_ASM_FILE_START aarch64_start_file
11213 #undef TARGET_ASM_OUTPUT_MI_THUNK
11214 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11216 #undef TARGET_ASM_SELECT_RTX_SECTION
11217 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11219 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11220 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11222 #undef TARGET_BUILD_BUILTIN_VA_LIST
11223 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11225 #undef TARGET_CALLEE_COPIES
11226 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11228 #undef TARGET_CAN_ELIMINATE
11229 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11231 #undef TARGET_CANNOT_FORCE_CONST_MEM
11232 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11234 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11235 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11237 /* Only the least significant bit is used for initialization guard
11238 variables. */
11239 #undef TARGET_CXX_GUARD_MASK_BIT
11240 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11242 #undef TARGET_C_MODE_FOR_SUFFIX
11243 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11245 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11246 #undef TARGET_DEFAULT_TARGET_FLAGS
11247 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11248 #endif
11250 #undef TARGET_CLASS_MAX_NREGS
11251 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11253 #undef TARGET_BUILTIN_DECL
11254 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11256 #undef TARGET_EXPAND_BUILTIN
11257 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11259 #undef TARGET_EXPAND_BUILTIN_VA_START
11260 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11262 #undef TARGET_FOLD_BUILTIN
11263 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11265 #undef TARGET_FUNCTION_ARG
11266 #define TARGET_FUNCTION_ARG aarch64_function_arg
11268 #undef TARGET_FUNCTION_ARG_ADVANCE
11269 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11271 #undef TARGET_FUNCTION_ARG_BOUNDARY
11272 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11274 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11275 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11277 #undef TARGET_FUNCTION_VALUE
11278 #define TARGET_FUNCTION_VALUE aarch64_function_value
11280 #undef TARGET_FUNCTION_VALUE_REGNO_P
11281 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11283 #undef TARGET_FRAME_POINTER_REQUIRED
11284 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11286 #undef TARGET_GIMPLE_FOLD_BUILTIN
11287 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11289 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11290 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11292 #undef TARGET_INIT_BUILTINS
11293 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11295 #undef TARGET_LEGITIMATE_ADDRESS_P
11296 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11298 #undef TARGET_LEGITIMATE_CONSTANT_P
11299 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11301 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11302 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11304 #undef TARGET_LRA_P
11305 #define TARGET_LRA_P hook_bool_void_true
11307 #undef TARGET_MANGLE_TYPE
11308 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11310 #undef TARGET_MEMORY_MOVE_COST
11311 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11313 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11314 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11316 #undef TARGET_MUST_PASS_IN_STACK
11317 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11319 /* This target hook should return true if accesses to volatile bitfields
11320 should use the narrowest mode possible. It should return false if these
11321 accesses should use the bitfield container type. */
11322 #undef TARGET_NARROW_VOLATILE_BITFIELD
11323 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11325 #undef TARGET_OPTION_OVERRIDE
11326 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11328 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11329 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11330 aarch64_override_options_after_change
11332 #undef TARGET_PASS_BY_REFERENCE
11333 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11335 #undef TARGET_PREFERRED_RELOAD_CLASS
11336 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11338 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11339 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11341 #undef TARGET_SECONDARY_RELOAD
11342 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11344 #undef TARGET_SHIFT_TRUNCATION_MASK
11345 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11347 #undef TARGET_SETUP_INCOMING_VARARGS
11348 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11350 #undef TARGET_STRUCT_VALUE_RTX
11351 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11353 #undef TARGET_REGISTER_MOVE_COST
11354 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11356 #undef TARGET_RETURN_IN_MEMORY
11357 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11359 #undef TARGET_RETURN_IN_MSB
11360 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11362 #undef TARGET_RTX_COSTS
11363 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11365 #undef TARGET_SCHED_ISSUE_RATE
11366 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11368 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11369 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11370 aarch64_sched_first_cycle_multipass_dfa_lookahead
11372 #undef TARGET_TRAMPOLINE_INIT
11373 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11375 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11376 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11378 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11379 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11381 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11382 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11384 #undef TARGET_VECTORIZE_ADD_STMT_COST
11385 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11387 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11388 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11389 aarch64_builtin_vectorization_cost
11391 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11392 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11394 #undef TARGET_VECTORIZE_BUILTINS
11395 #define TARGET_VECTORIZE_BUILTINS
11397 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11398 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11399 aarch64_builtin_vectorized_function
11401 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11402 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11403 aarch64_autovectorize_vector_sizes
11405 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11406 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11407 aarch64_atomic_assign_expand_fenv
11409 /* Section anchor support. */
11411 #undef TARGET_MIN_ANCHOR_OFFSET
11412 #define TARGET_MIN_ANCHOR_OFFSET -256
11414 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11415 byte offset; we can do much more for larger data types, but have no way
11416 to determine the size of the access. We assume accesses are aligned. */
11417 #undef TARGET_MAX_ANCHOR_OFFSET
11418 #define TARGET_MAX_ANCHOR_OFFSET 4095
11420 #undef TARGET_VECTOR_ALIGNMENT
11421 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11423 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11424 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11425 aarch64_simd_vector_alignment_reachable
11427 /* vec_perm support. */
11429 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11430 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11431 aarch64_vectorize_vec_perm_const_ok
11434 #undef TARGET_FIXED_CONDITION_CODE_REGS
11435 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11437 #undef TARGET_FLAGS_REGNUM
11438 #define TARGET_FLAGS_REGNUM CC_REGNUM
11440 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11441 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11443 #undef TARGET_ASAN_SHADOW_OFFSET
11444 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11446 #undef TARGET_LEGITIMIZE_ADDRESS
11447 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11449 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11450 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11451 aarch64_use_by_pieces_infrastructure_p
11453 #undef TARGET_CAN_USE_DOLOOP_P
11454 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11456 #undef TARGET_SCHED_MACRO_FUSION_P
11457 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11459 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11460 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11462 #undef TARGET_SCHED_FUSION_PRIORITY
11463 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11465 struct gcc_target targetm = TARGET_INITIALIZER;
11467 #include "gt-aarch64.h"