[AArch64][14/14] Reuse target_option_current_node when passing pragma string to targe...
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobad6c330b75e5bd1987c0ffbaa4f4d223b6755cac
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "rtl.h"
29 #include "df.h"
30 #include "insn-codes.h"
31 #include "insn-attr.h"
32 #include "alias.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
36 #include "calls.h"
37 #include "varasm.h"
38 #include "regs.h"
39 #include "cfgrtl.h"
40 #include "cfganal.h"
41 #include "lcm.h"
42 #include "cfgbuild.h"
43 #include "cfgcleanup.h"
44 #include "output.h"
45 #include "flags.h"
46 #include "insn-config.h"
47 #include "expmed.h"
48 #include "dojump.h"
49 #include "explow.h"
50 #include "emit-rtl.h"
51 #include "stmt.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "toplev.h"
55 #include "target.h"
56 #include "targhooks.h"
57 #include "tm_p.h"
58 #include "recog.h"
59 #include "langhooks.h"
60 #include "opts.h"
61 #include "diagnostic.h"
62 #include "diagnostic-core.h"
63 #include "internal-fn.h"
64 #include "gimple-fold.h"
65 #include "tree-eh.h"
66 #include "gimplify.h"
67 #include "optabs.h"
68 #include "dwarf2.h"
69 #include "cfgloop.h"
70 #include "tree-vectorizer.h"
71 #include "aarch64-cost-tables.h"
72 #include "dumpfile.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tm-constrs.h"
76 #include "sched-int.h"
77 #include "cortex-a57-fma-steering.h"
78 #include "target-globals.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 /* Defined for convenience. */
84 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
86 /* Classifies an address.
88 ADDRESS_REG_IMM
89 A simple base register plus immediate offset.
91 ADDRESS_REG_WB
92 A base register indexed by immediate offset with writeback.
94 ADDRESS_REG_REG
95 A base register indexed by (optionally scaled) register.
97 ADDRESS_REG_UXTW
98 A base register indexed by (optionally scaled) zero-extended register.
100 ADDRESS_REG_SXTW
101 A base register indexed by (optionally scaled) sign-extended register.
103 ADDRESS_LO_SUM
104 A LO_SUM rtx with a base register and "LO12" symbol relocation.
106 ADDRESS_SYMBOLIC:
107 A constant symbolic address, in pc-relative literal pool. */
109 enum aarch64_address_type {
110 ADDRESS_REG_IMM,
111 ADDRESS_REG_WB,
112 ADDRESS_REG_REG,
113 ADDRESS_REG_UXTW,
114 ADDRESS_REG_SXTW,
115 ADDRESS_LO_SUM,
116 ADDRESS_SYMBOLIC
119 struct aarch64_address_info {
120 enum aarch64_address_type type;
121 rtx base;
122 rtx offset;
123 int shift;
124 enum aarch64_symbol_type symbol_type;
127 struct simd_immediate_info
129 rtx value;
130 int shift;
131 int element_width;
132 bool mvn;
133 bool msl;
136 /* The current code model. */
137 enum aarch64_code_model aarch64_cmodel;
139 #ifdef HAVE_AS_TLS
140 #undef TARGET_HAVE_TLS
141 #define TARGET_HAVE_TLS 1
142 #endif
144 static bool aarch64_composite_type_p (const_tree, machine_mode);
145 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
146 const_tree,
147 machine_mode *, int *,
148 bool *);
149 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
150 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
151 static void aarch64_override_options_after_change (void);
152 static bool aarch64_vector_mode_supported_p (machine_mode);
153 static unsigned bit_count (unsigned HOST_WIDE_INT);
154 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
155 const unsigned char *sel);
156 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
158 /* Major revision number of the ARM Architecture implemented by the target. */
159 unsigned aarch64_architecture_version;
161 /* The processor for which instructions should be scheduled. */
162 enum aarch64_processor aarch64_tune = cortexa53;
164 /* Mask to specify which instruction scheduling options should be used. */
165 unsigned long aarch64_tune_flags = 0;
167 /* Support for command line parsing of boolean flags in the tuning
168 structures. */
169 struct aarch64_flag_desc
171 const char* name;
172 unsigned int flag;
175 #define AARCH64_FUSION_PAIR(name, internal_name, y) \
176 { name, AARCH64_FUSE_##internal_name },
177 static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
179 { "none", AARCH64_FUSE_NOTHING },
180 #include "aarch64-fusion-pairs.def"
181 { "all", AARCH64_FUSE_ALL },
182 { NULL, AARCH64_FUSE_NOTHING }
184 #undef AARCH64_FUION_PAIR
186 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
187 { name, AARCH64_EXTRA_TUNE_##internal_name },
188 static const struct aarch64_flag_desc aarch64_tuning_flags[] =
190 { "none", AARCH64_EXTRA_TUNE_NONE },
191 #include "aarch64-tuning-flags.def"
192 { "all", AARCH64_EXTRA_TUNE_ALL },
193 { NULL, AARCH64_EXTRA_TUNE_NONE }
195 #undef AARCH64_EXTRA_TUNING_OPTION
197 /* Tuning parameters. */
199 static const struct cpu_addrcost_table generic_addrcost_table =
202 0, /* hi */
203 0, /* si */
204 0, /* di */
205 0, /* ti */
207 0, /* pre_modify */
208 0, /* post_modify */
209 0, /* register_offset */
210 0, /* register_extend */
211 0 /* imm_offset */
214 static const struct cpu_addrcost_table cortexa57_addrcost_table =
217 1, /* hi */
218 0, /* si */
219 0, /* di */
220 1, /* ti */
222 0, /* pre_modify */
223 0, /* post_modify */
224 0, /* register_offset */
225 0, /* register_extend */
226 0, /* imm_offset */
229 static const struct cpu_addrcost_table xgene1_addrcost_table =
232 1, /* hi */
233 0, /* si */
234 0, /* di */
235 1, /* ti */
237 1, /* pre_modify */
238 0, /* post_modify */
239 0, /* register_offset */
240 1, /* register_extend */
241 0, /* imm_offset */
244 static const struct cpu_regmove_cost generic_regmove_cost =
246 1, /* GP2GP */
247 /* Avoid the use of slow int<->fp moves for spilling by setting
248 their cost higher than memmov_cost. */
249 5, /* GP2FP */
250 5, /* FP2GP */
251 2 /* FP2FP */
254 static const struct cpu_regmove_cost cortexa57_regmove_cost =
256 1, /* GP2GP */
257 /* Avoid the use of slow int<->fp moves for spilling by setting
258 their cost higher than memmov_cost. */
259 5, /* GP2FP */
260 5, /* FP2GP */
261 2 /* FP2FP */
264 static const struct cpu_regmove_cost cortexa53_regmove_cost =
266 1, /* GP2GP */
267 /* Avoid the use of slow int<->fp moves for spilling by setting
268 their cost higher than memmov_cost. */
269 5, /* GP2FP */
270 5, /* FP2GP */
271 2 /* FP2FP */
274 static const struct cpu_regmove_cost thunderx_regmove_cost =
276 2, /* GP2GP */
277 2, /* GP2FP */
278 6, /* FP2GP */
279 4 /* FP2FP */
282 static const struct cpu_regmove_cost xgene1_regmove_cost =
284 1, /* GP2GP */
285 /* Avoid the use of slow int<->fp moves for spilling by setting
286 their cost higher than memmov_cost. */
287 8, /* GP2FP */
288 8, /* FP2GP */
289 2 /* FP2FP */
292 /* Generic costs for vector insn classes. */
293 static const struct cpu_vector_cost generic_vector_cost =
295 1, /* scalar_stmt_cost */
296 1, /* scalar_load_cost */
297 1, /* scalar_store_cost */
298 1, /* vec_stmt_cost */
299 1, /* vec_to_scalar_cost */
300 1, /* scalar_to_vec_cost */
301 1, /* vec_align_load_cost */
302 1, /* vec_unalign_load_cost */
303 1, /* vec_unalign_store_cost */
304 1, /* vec_store_cost */
305 3, /* cond_taken_branch_cost */
306 1 /* cond_not_taken_branch_cost */
309 /* Generic costs for vector insn classes. */
310 static const struct cpu_vector_cost cortexa57_vector_cost =
312 1, /* scalar_stmt_cost */
313 4, /* scalar_load_cost */
314 1, /* scalar_store_cost */
315 3, /* vec_stmt_cost */
316 8, /* vec_to_scalar_cost */
317 8, /* scalar_to_vec_cost */
318 5, /* vec_align_load_cost */
319 5, /* vec_unalign_load_cost */
320 1, /* vec_unalign_store_cost */
321 1, /* vec_store_cost */
322 1, /* cond_taken_branch_cost */
323 1 /* cond_not_taken_branch_cost */
326 /* Generic costs for vector insn classes. */
327 static const struct cpu_vector_cost xgene1_vector_cost =
329 1, /* scalar_stmt_cost */
330 5, /* scalar_load_cost */
331 1, /* scalar_store_cost */
332 2, /* vec_stmt_cost */
333 4, /* vec_to_scalar_cost */
334 4, /* scalar_to_vec_cost */
335 10, /* vec_align_load_cost */
336 10, /* vec_unalign_load_cost */
337 2, /* vec_unalign_store_cost */
338 2, /* vec_store_cost */
339 2, /* cond_taken_branch_cost */
340 1 /* cond_not_taken_branch_cost */
343 /* Generic costs for branch instructions. */
344 static const struct cpu_branch_cost generic_branch_cost =
346 2, /* Predictable. */
347 2 /* Unpredictable. */
350 static const struct tune_params generic_tunings =
352 &cortexa57_extra_costs,
353 &generic_addrcost_table,
354 &generic_regmove_cost,
355 &generic_vector_cost,
356 &generic_branch_cost,
357 4, /* memmov_cost */
358 2, /* issue_rate */
359 AARCH64_FUSE_NOTHING, /* fusible_ops */
360 8, /* function_align. */
361 8, /* jump_align. */
362 4, /* loop_align. */
363 2, /* int_reassoc_width. */
364 4, /* fp_reassoc_width. */
365 1, /* vec_reassoc_width. */
366 2, /* min_div_recip_mul_sf. */
367 2, /* min_div_recip_mul_df. */
368 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
371 static const struct tune_params cortexa53_tunings =
373 &cortexa53_extra_costs,
374 &generic_addrcost_table,
375 &cortexa53_regmove_cost,
376 &generic_vector_cost,
377 &generic_branch_cost,
378 4, /* memmov_cost */
379 2, /* issue_rate */
380 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
381 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
382 8, /* function_align. */
383 8, /* jump_align. */
384 4, /* loop_align. */
385 2, /* int_reassoc_width. */
386 4, /* fp_reassoc_width. */
387 1, /* vec_reassoc_width. */
388 2, /* min_div_recip_mul_sf. */
389 2, /* min_div_recip_mul_df. */
390 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
393 static const struct tune_params cortexa57_tunings =
395 &cortexa57_extra_costs,
396 &cortexa57_addrcost_table,
397 &cortexa57_regmove_cost,
398 &cortexa57_vector_cost,
399 &generic_branch_cost,
400 4, /* memmov_cost */
401 3, /* issue_rate */
402 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
403 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
404 16, /* function_align. */
405 8, /* jump_align. */
406 4, /* loop_align. */
407 2, /* int_reassoc_width. */
408 4, /* fp_reassoc_width. */
409 1, /* vec_reassoc_width. */
410 2, /* min_div_recip_mul_sf. */
411 2, /* min_div_recip_mul_df. */
412 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
415 static const struct tune_params cortexa72_tunings =
417 &cortexa57_extra_costs,
418 &cortexa57_addrcost_table,
419 &cortexa57_regmove_cost,
420 &cortexa57_vector_cost,
421 &generic_branch_cost,
422 4, /* memmov_cost */
423 3, /* issue_rate */
424 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
425 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
426 16, /* function_align. */
427 8, /* jump_align. */
428 4, /* loop_align. */
429 2, /* int_reassoc_width. */
430 4, /* fp_reassoc_width. */
431 1, /* vec_reassoc_width. */
432 2, /* min_div_recip_mul_sf. */
433 2, /* min_div_recip_mul_df. */
434 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
437 static const struct tune_params thunderx_tunings =
439 &thunderx_extra_costs,
440 &generic_addrcost_table,
441 &thunderx_regmove_cost,
442 &generic_vector_cost,
443 &generic_branch_cost,
444 6, /* memmov_cost */
445 2, /* issue_rate */
446 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
447 8, /* function_align. */
448 8, /* jump_align. */
449 8, /* loop_align. */
450 2, /* int_reassoc_width. */
451 4, /* fp_reassoc_width. */
452 1, /* vec_reassoc_width. */
453 2, /* min_div_recip_mul_sf. */
454 2, /* min_div_recip_mul_df. */
455 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
458 static const struct tune_params xgene1_tunings =
460 &xgene1_extra_costs,
461 &xgene1_addrcost_table,
462 &xgene1_regmove_cost,
463 &xgene1_vector_cost,
464 &generic_branch_cost,
465 6, /* memmov_cost */
466 4, /* issue_rate */
467 AARCH64_FUSE_NOTHING, /* fusible_ops */
468 16, /* function_align. */
469 8, /* jump_align. */
470 16, /* loop_align. */
471 2, /* int_reassoc_width. */
472 4, /* fp_reassoc_width. */
473 1, /* vec_reassoc_width. */
474 2, /* min_div_recip_mul_sf. */
475 2, /* min_div_recip_mul_df. */
476 (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
479 /* Support for fine-grained override of the tuning structures. */
480 struct aarch64_tuning_override_function
482 const char* name;
483 void (*parse_override)(const char*, struct tune_params*);
486 static void aarch64_parse_fuse_string (const char*, struct tune_params*);
487 static void aarch64_parse_tune_string (const char*, struct tune_params*);
489 static const struct aarch64_tuning_override_function
490 aarch64_tuning_override_functions[] =
492 { "fuse", aarch64_parse_fuse_string },
493 { "tune", aarch64_parse_tune_string },
494 { NULL, NULL }
497 /* A processor implementing AArch64. */
498 struct processor
500 const char *const name;
501 enum aarch64_processor ident;
502 enum aarch64_processor sched_core;
503 enum aarch64_arch arch;
504 unsigned architecture_version;
505 const unsigned long flags;
506 const struct tune_params *const tune;
509 /* Architectures implementing AArch64. */
510 static const struct processor all_architectures[] =
512 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
513 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
514 #include "aarch64-arches.def"
515 #undef AARCH64_ARCH
516 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
519 /* Processor cores implementing AArch64. */
520 static const struct processor all_cores[] =
522 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
523 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
524 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
525 FLAGS, &COSTS##_tunings},
526 #include "aarch64-cores.def"
527 #undef AARCH64_CORE
528 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
529 AARCH64_FL_FOR_ARCH8, &generic_tunings},
530 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
534 /* Target specification. These are populated by the -march, -mtune, -mcpu
535 handling code or by target attributes. */
536 static const struct processor *selected_arch;
537 static const struct processor *selected_cpu;
538 static const struct processor *selected_tune;
540 /* The current tuning set. */
541 struct tune_params aarch64_tune_params = generic_tunings;
543 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
545 /* An ISA extension in the co-processor and main instruction set space. */
546 struct aarch64_option_extension
548 const char *const name;
549 const unsigned long flags_on;
550 const unsigned long flags_off;
553 /* ISA extensions in AArch64. */
554 static const struct aarch64_option_extension all_extensions[] =
556 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
557 {NAME, FLAGS_ON, FLAGS_OFF},
558 #include "aarch64-option-extensions.def"
559 #undef AARCH64_OPT_EXTENSION
560 {NULL, 0, 0}
563 /* Used to track the size of an address when generating a pre/post
564 increment address. */
565 static machine_mode aarch64_memory_reference_mode;
567 /* A table of valid AArch64 "bitmask immediate" values for
568 logical instructions. */
570 #define AARCH64_NUM_BITMASKS 5334
571 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
573 typedef enum aarch64_cond_code
575 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
576 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
577 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
579 aarch64_cc;
581 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
583 /* The condition codes of the processor, and the inverse function. */
584 static const char * const aarch64_condition_codes[] =
586 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
587 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
590 void
591 aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
593 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
594 if (TARGET_GENERAL_REGS_ONLY)
595 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
596 else
597 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
600 static unsigned int
601 aarch64_min_divisions_for_recip_mul (enum machine_mode mode)
603 if (GET_MODE_UNIT_SIZE (mode) == 4)
604 return aarch64_tune_params.min_div_recip_mul_sf;
605 return aarch64_tune_params.min_div_recip_mul_df;
608 static int
609 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
610 enum machine_mode mode)
612 if (VECTOR_MODE_P (mode))
613 return aarch64_tune_params.vec_reassoc_width;
614 if (INTEGRAL_MODE_P (mode))
615 return aarch64_tune_params.int_reassoc_width;
616 if (FLOAT_MODE_P (mode))
617 return aarch64_tune_params.fp_reassoc_width;
618 return 1;
621 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
622 unsigned
623 aarch64_dbx_register_number (unsigned regno)
625 if (GP_REGNUM_P (regno))
626 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
627 else if (regno == SP_REGNUM)
628 return AARCH64_DWARF_SP;
629 else if (FP_REGNUM_P (regno))
630 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
632 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
633 equivalent DWARF register. */
634 return DWARF_FRAME_REGISTERS;
637 /* Return TRUE if MODE is any of the large INT modes. */
638 static bool
639 aarch64_vect_struct_mode_p (machine_mode mode)
641 return mode == OImode || mode == CImode || mode == XImode;
644 /* Return TRUE if MODE is any of the vector modes. */
645 static bool
646 aarch64_vector_mode_p (machine_mode mode)
648 return aarch64_vector_mode_supported_p (mode)
649 || aarch64_vect_struct_mode_p (mode);
652 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
653 static bool
654 aarch64_array_mode_supported_p (machine_mode mode,
655 unsigned HOST_WIDE_INT nelems)
657 if (TARGET_SIMD
658 && AARCH64_VALID_SIMD_QREG_MODE (mode)
659 && (nelems >= 2 && nelems <= 4))
660 return true;
662 return false;
665 /* Implement HARD_REGNO_NREGS. */
668 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
670 switch (aarch64_regno_regclass (regno))
672 case FP_REGS:
673 case FP_LO_REGS:
674 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
675 default:
676 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
678 gcc_unreachable ();
681 /* Implement HARD_REGNO_MODE_OK. */
684 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
686 if (GET_MODE_CLASS (mode) == MODE_CC)
687 return regno == CC_REGNUM;
689 if (regno == SP_REGNUM)
690 /* The purpose of comparing with ptr_mode is to support the
691 global register variable associated with the stack pointer
692 register via the syntax of asm ("wsp") in ILP32. */
693 return mode == Pmode || mode == ptr_mode;
695 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
696 return mode == Pmode;
698 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
699 return 1;
701 if (FP_REGNUM_P (regno))
703 if (aarch64_vect_struct_mode_p (mode))
704 return
705 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
706 else
707 return 1;
710 return 0;
713 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
714 machine_mode
715 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
716 machine_mode mode)
718 /* Handle modes that fit within single registers. */
719 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
721 if (GET_MODE_SIZE (mode) >= 4)
722 return mode;
723 else
724 return SImode;
726 /* Fall back to generic for multi-reg and very large modes. */
727 else
728 return choose_hard_reg_mode (regno, nregs, false);
731 /* Return true if calls to DECL should be treated as
732 long-calls (ie called via a register). */
733 static bool
734 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
736 return false;
739 /* Return true if calls to symbol-ref SYM should be treated as
740 long-calls (ie called via a register). */
741 bool
742 aarch64_is_long_call_p (rtx sym)
744 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
747 /* Return true if the offsets to a zero/sign-extract operation
748 represent an expression that matches an extend operation. The
749 operands represent the paramters from
751 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
752 bool
753 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
754 rtx extract_imm)
756 HOST_WIDE_INT mult_val, extract_val;
758 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
759 return false;
761 mult_val = INTVAL (mult_imm);
762 extract_val = INTVAL (extract_imm);
764 if (extract_val > 8
765 && extract_val < GET_MODE_BITSIZE (mode)
766 && exact_log2 (extract_val & ~7) > 0
767 && (extract_val & 7) <= 4
768 && mult_val == (1 << (extract_val & 7)))
769 return true;
771 return false;
774 /* Emit an insn that's a simple single-set. Both the operands must be
775 known to be valid. */
776 inline static rtx
777 emit_set_insn (rtx x, rtx y)
779 return emit_insn (gen_rtx_SET (x, y));
782 /* X and Y are two things to compare using CODE. Emit the compare insn and
783 return the rtx for register 0 in the proper mode. */
785 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
787 machine_mode mode = SELECT_CC_MODE (code, x, y);
788 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
790 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
791 return cc_reg;
794 /* Build the SYMBOL_REF for __tls_get_addr. */
796 static GTY(()) rtx tls_get_addr_libfunc;
799 aarch64_tls_get_addr (void)
801 if (!tls_get_addr_libfunc)
802 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
803 return tls_get_addr_libfunc;
806 /* Return the TLS model to use for ADDR. */
808 static enum tls_model
809 tls_symbolic_operand_type (rtx addr)
811 enum tls_model tls_kind = TLS_MODEL_NONE;
812 rtx sym, addend;
814 if (GET_CODE (addr) == CONST)
816 split_const (addr, &sym, &addend);
817 if (GET_CODE (sym) == SYMBOL_REF)
818 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
820 else if (GET_CODE (addr) == SYMBOL_REF)
821 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
823 return tls_kind;
826 /* We'll allow lo_sum's in addresses in our legitimate addresses
827 so that combine would take care of combining addresses where
828 necessary, but for generation purposes, we'll generate the address
829 as :
830 RTL Absolute
831 tmp = hi (symbol_ref); adrp x1, foo
832 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
835 PIC TLS
836 adrp x1, :got:foo adrp tmp, :tlsgd:foo
837 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
838 bl __tls_get_addr
841 Load TLS symbol, depending on TLS mechanism and TLS access model.
843 Global Dynamic - Traditional TLS:
844 adrp tmp, :tlsgd:imm
845 add dest, tmp, #:tlsgd_lo12:imm
846 bl __tls_get_addr
848 Global Dynamic - TLS Descriptors:
849 adrp dest, :tlsdesc:imm
850 ldr tmp, [dest, #:tlsdesc_lo12:imm]
851 add dest, dest, #:tlsdesc_lo12:imm
852 blr tmp
853 mrs tp, tpidr_el0
854 add dest, dest, tp
856 Initial Exec:
857 mrs tp, tpidr_el0
858 adrp tmp, :gottprel:imm
859 ldr dest, [tmp, #:gottprel_lo12:imm]
860 add dest, dest, tp
862 Local Exec:
863 mrs tp, tpidr_el0
864 add t0, tp, #:tprel_hi12:imm, lsl #12
865 add t0, t0, #:tprel_lo12_nc:imm
868 static void
869 aarch64_load_symref_appropriately (rtx dest, rtx imm,
870 enum aarch64_symbol_type type)
872 switch (type)
874 case SYMBOL_SMALL_ABSOLUTE:
876 /* In ILP32, the mode of dest can be either SImode or DImode. */
877 rtx tmp_reg = dest;
878 machine_mode mode = GET_MODE (dest);
880 gcc_assert (mode == Pmode || mode == ptr_mode);
882 if (can_create_pseudo_p ())
883 tmp_reg = gen_reg_rtx (mode);
885 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
886 emit_insn (gen_add_losym (dest, tmp_reg, imm));
887 return;
890 case SYMBOL_TINY_ABSOLUTE:
891 emit_insn (gen_rtx_SET (dest, imm));
892 return;
894 case SYMBOL_SMALL_GOT_28K:
896 machine_mode mode = GET_MODE (dest);
897 rtx gp_rtx = pic_offset_table_rtx;
898 rtx insn;
899 rtx mem;
901 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
902 here before rtl expand. Tree IVOPT will generate rtl pattern to
903 decide rtx costs, in which case pic_offset_table_rtx is not
904 initialized. For that case no need to generate the first adrp
905 instruction as the final cost for global variable access is
906 one instruction. */
907 if (gp_rtx != NULL)
909 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
910 using the page base as GOT base, the first page may be wasted,
911 in the worst scenario, there is only 28K space for GOT).
913 The generate instruction sequence for accessing global variable
916 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
918 Only one instruction needed. But we must initialize
919 pic_offset_table_rtx properly. We generate initialize insn for
920 every global access, and allow CSE to remove all redundant.
922 The final instruction sequences will look like the following
923 for multiply global variables access.
925 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
927 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
928 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
929 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
930 ... */
932 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
933 crtl->uses_pic_offset_table = 1;
934 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
936 if (mode != GET_MODE (gp_rtx))
937 gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
940 if (mode == ptr_mode)
942 if (mode == DImode)
943 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
944 else
945 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
947 mem = XVECEXP (SET_SRC (insn), 0, 0);
949 else
951 gcc_assert (mode == Pmode);
953 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
954 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
957 /* The operand is expected to be MEM. Whenever the related insn
958 pattern changed, above code which calculate mem should be
959 updated. */
960 gcc_assert (GET_CODE (mem) == MEM);
961 MEM_READONLY_P (mem) = 1;
962 MEM_NOTRAP_P (mem) = 1;
963 emit_insn (insn);
964 return;
967 case SYMBOL_SMALL_GOT_4G:
969 /* In ILP32, the mode of dest can be either SImode or DImode,
970 while the got entry is always of SImode size. The mode of
971 dest depends on how dest is used: if dest is assigned to a
972 pointer (e.g. in the memory), it has SImode; it may have
973 DImode if dest is dereferenced to access the memeory.
974 This is why we have to handle three different ldr_got_small
975 patterns here (two patterns for ILP32). */
977 rtx insn;
978 rtx mem;
979 rtx tmp_reg = dest;
980 machine_mode mode = GET_MODE (dest);
982 if (can_create_pseudo_p ())
983 tmp_reg = gen_reg_rtx (mode);
985 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
986 if (mode == ptr_mode)
988 if (mode == DImode)
989 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
990 else
991 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
993 mem = XVECEXP (SET_SRC (insn), 0, 0);
995 else
997 gcc_assert (mode == Pmode);
999 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1000 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1003 gcc_assert (GET_CODE (mem) == MEM);
1004 MEM_READONLY_P (mem) = 1;
1005 MEM_NOTRAP_P (mem) = 1;
1006 emit_insn (insn);
1007 return;
1010 case SYMBOL_SMALL_TLSGD:
1012 rtx_insn *insns;
1013 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
1015 start_sequence ();
1016 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
1017 insns = get_insns ();
1018 end_sequence ();
1020 RTL_CONST_CALL_P (insns) = 1;
1021 emit_libcall_block (insns, dest, result, imm);
1022 return;
1025 case SYMBOL_SMALL_TLSDESC:
1027 machine_mode mode = GET_MODE (dest);
1028 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
1029 rtx tp;
1031 gcc_assert (mode == Pmode || mode == ptr_mode);
1033 /* In ILP32, the got entry is always of SImode size. Unlike
1034 small GOT, the dest is fixed at reg 0. */
1035 if (TARGET_ILP32)
1036 emit_insn (gen_tlsdesc_small_si (imm));
1037 else
1038 emit_insn (gen_tlsdesc_small_di (imm));
1039 tp = aarch64_load_tp (NULL);
1041 if (mode != Pmode)
1042 tp = gen_lowpart (mode, tp);
1044 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
1045 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1046 return;
1049 case SYMBOL_SMALL_GOTTPREL:
1051 /* In ILP32, the mode of dest can be either SImode or DImode,
1052 while the got entry is always of SImode size. The mode of
1053 dest depends on how dest is used: if dest is assigned to a
1054 pointer (e.g. in the memory), it has SImode; it may have
1055 DImode if dest is dereferenced to access the memeory.
1056 This is why we have to handle three different tlsie_small
1057 patterns here (two patterns for ILP32). */
1058 machine_mode mode = GET_MODE (dest);
1059 rtx tmp_reg = gen_reg_rtx (mode);
1060 rtx tp = aarch64_load_tp (NULL);
1062 if (mode == ptr_mode)
1064 if (mode == DImode)
1065 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1066 else
1068 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1069 tp = gen_lowpart (mode, tp);
1072 else
1074 gcc_assert (mode == Pmode);
1075 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1078 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
1079 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1080 return;
1083 case SYMBOL_TLSLE:
1085 rtx tp = aarch64_load_tp (NULL);
1087 if (GET_MODE (dest) != Pmode)
1088 tp = gen_lowpart (GET_MODE (dest), tp);
1090 emit_insn (gen_tlsle (dest, tp, imm));
1091 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1092 return;
1095 case SYMBOL_TINY_GOT:
1096 emit_insn (gen_ldr_got_tiny (dest, imm));
1097 return;
1099 default:
1100 gcc_unreachable ();
1104 /* Emit a move from SRC to DEST. Assume that the move expanders can
1105 handle all moves if !can_create_pseudo_p (). The distinction is
1106 important because, unlike emit_move_insn, the move expanders know
1107 how to force Pmode objects into the constant pool even when the
1108 constant pool address is not itself legitimate. */
1109 static rtx
1110 aarch64_emit_move (rtx dest, rtx src)
1112 return (can_create_pseudo_p ()
1113 ? emit_move_insn (dest, src)
1114 : emit_move_insn_1 (dest, src));
1117 /* Split a 128-bit move operation into two 64-bit move operations,
1118 taking care to handle partial overlap of register to register
1119 copies. Special cases are needed when moving between GP regs and
1120 FP regs. SRC can be a register, constant or memory; DST a register
1121 or memory. If either operand is memory it must not have any side
1122 effects. */
1123 void
1124 aarch64_split_128bit_move (rtx dst, rtx src)
1126 rtx dst_lo, dst_hi;
1127 rtx src_lo, src_hi;
1129 machine_mode mode = GET_MODE (dst);
1131 gcc_assert (mode == TImode || mode == TFmode);
1132 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1133 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
1135 if (REG_P (dst) && REG_P (src))
1137 int src_regno = REGNO (src);
1138 int dst_regno = REGNO (dst);
1140 /* Handle FP <-> GP regs. */
1141 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1143 src_lo = gen_lowpart (word_mode, src);
1144 src_hi = gen_highpart (word_mode, src);
1146 if (mode == TImode)
1148 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1149 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1151 else
1153 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1154 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1156 return;
1158 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1160 dst_lo = gen_lowpart (word_mode, dst);
1161 dst_hi = gen_highpart (word_mode, dst);
1163 if (mode == TImode)
1165 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1166 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1168 else
1170 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1171 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1173 return;
1177 dst_lo = gen_lowpart (word_mode, dst);
1178 dst_hi = gen_highpart (word_mode, dst);
1179 src_lo = gen_lowpart (word_mode, src);
1180 src_hi = gen_highpart_mode (word_mode, mode, src);
1182 /* At most one pairing may overlap. */
1183 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1185 aarch64_emit_move (dst_hi, src_hi);
1186 aarch64_emit_move (dst_lo, src_lo);
1188 else
1190 aarch64_emit_move (dst_lo, src_lo);
1191 aarch64_emit_move (dst_hi, src_hi);
1195 bool
1196 aarch64_split_128bit_move_p (rtx dst, rtx src)
1198 return (! REG_P (src)
1199 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1202 /* Split a complex SIMD combine. */
1204 void
1205 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1207 machine_mode src_mode = GET_MODE (src1);
1208 machine_mode dst_mode = GET_MODE (dst);
1210 gcc_assert (VECTOR_MODE_P (dst_mode));
1212 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1214 rtx (*gen) (rtx, rtx, rtx);
1216 switch (src_mode)
1218 case V8QImode:
1219 gen = gen_aarch64_simd_combinev8qi;
1220 break;
1221 case V4HImode:
1222 gen = gen_aarch64_simd_combinev4hi;
1223 break;
1224 case V2SImode:
1225 gen = gen_aarch64_simd_combinev2si;
1226 break;
1227 case V2SFmode:
1228 gen = gen_aarch64_simd_combinev2sf;
1229 break;
1230 case DImode:
1231 gen = gen_aarch64_simd_combinedi;
1232 break;
1233 case DFmode:
1234 gen = gen_aarch64_simd_combinedf;
1235 break;
1236 default:
1237 gcc_unreachable ();
1240 emit_insn (gen (dst, src1, src2));
1241 return;
1245 /* Split a complex SIMD move. */
1247 void
1248 aarch64_split_simd_move (rtx dst, rtx src)
1250 machine_mode src_mode = GET_MODE (src);
1251 machine_mode dst_mode = GET_MODE (dst);
1253 gcc_assert (VECTOR_MODE_P (dst_mode));
1255 if (REG_P (dst) && REG_P (src))
1257 rtx (*gen) (rtx, rtx);
1259 gcc_assert (VECTOR_MODE_P (src_mode));
1261 switch (src_mode)
1263 case V16QImode:
1264 gen = gen_aarch64_split_simd_movv16qi;
1265 break;
1266 case V8HImode:
1267 gen = gen_aarch64_split_simd_movv8hi;
1268 break;
1269 case V4SImode:
1270 gen = gen_aarch64_split_simd_movv4si;
1271 break;
1272 case V2DImode:
1273 gen = gen_aarch64_split_simd_movv2di;
1274 break;
1275 case V4SFmode:
1276 gen = gen_aarch64_split_simd_movv4sf;
1277 break;
1278 case V2DFmode:
1279 gen = gen_aarch64_split_simd_movv2df;
1280 break;
1281 default:
1282 gcc_unreachable ();
1285 emit_insn (gen (dst, src));
1286 return;
1290 static rtx
1291 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1293 if (can_create_pseudo_p ())
1294 return force_reg (mode, value);
1295 else
1297 x = aarch64_emit_move (x, value);
1298 return x;
1303 static rtx
1304 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1306 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1308 rtx high;
1309 /* Load the full offset into a register. This
1310 might be improvable in the future. */
1311 high = GEN_INT (offset);
1312 offset = 0;
1313 high = aarch64_force_temporary (mode, temp, high);
1314 reg = aarch64_force_temporary (mode, temp,
1315 gen_rtx_PLUS (mode, high, reg));
1317 return plus_constant (mode, reg, offset);
1320 static int
1321 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1322 machine_mode mode)
1324 unsigned HOST_WIDE_INT mask;
1325 int i;
1326 bool first;
1327 unsigned HOST_WIDE_INT val;
1328 bool subtargets;
1329 rtx subtarget;
1330 int one_match, zero_match, first_not_ffff_match;
1331 int num_insns = 0;
1333 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1335 if (generate)
1336 emit_insn (gen_rtx_SET (dest, imm));
1337 num_insns++;
1338 return num_insns;
1341 if (mode == SImode)
1343 /* We know we can't do this in 1 insn, and we must be able to do it
1344 in two; so don't mess around looking for sequences that don't buy
1345 us anything. */
1346 if (generate)
1348 emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
1349 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1350 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1352 num_insns += 2;
1353 return num_insns;
1356 /* Remaining cases are all for DImode. */
1358 val = INTVAL (imm);
1359 subtargets = optimize && can_create_pseudo_p ();
1361 one_match = 0;
1362 zero_match = 0;
1363 mask = 0xffff;
1364 first_not_ffff_match = -1;
1366 for (i = 0; i < 64; i += 16, mask <<= 16)
1368 if ((val & mask) == mask)
1369 one_match++;
1370 else
1372 if (first_not_ffff_match < 0)
1373 first_not_ffff_match = i;
1374 if ((val & mask) == 0)
1375 zero_match++;
1379 if (one_match == 2)
1381 /* Set one of the quarters and then insert back into result. */
1382 mask = 0xffffll << first_not_ffff_match;
1383 if (generate)
1385 emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
1386 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1387 GEN_INT ((val >> first_not_ffff_match)
1388 & 0xffff)));
1390 num_insns += 2;
1391 return num_insns;
1394 if (zero_match == 2)
1395 goto simple_sequence;
1397 mask = 0x0ffff0000UL;
1398 for (i = 16; i < 64; i += 16, mask <<= 16)
1400 HOST_WIDE_INT comp = mask & ~(mask - 1);
1402 if (aarch64_uimm12_shift (val - (val & mask)))
1404 if (generate)
1406 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1407 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
1408 emit_insn (gen_adddi3 (dest, subtarget,
1409 GEN_INT (val - (val & mask))));
1411 num_insns += 2;
1412 return num_insns;
1414 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1416 if (generate)
1418 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1419 emit_insn (gen_rtx_SET (subtarget,
1420 GEN_INT ((val + comp) & mask)));
1421 emit_insn (gen_adddi3 (dest, subtarget,
1422 GEN_INT (val - ((val + comp) & mask))));
1424 num_insns += 2;
1425 return num_insns;
1427 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1429 if (generate)
1431 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1432 emit_insn (gen_rtx_SET (subtarget,
1433 GEN_INT ((val - comp) | ~mask)));
1434 emit_insn (gen_adddi3 (dest, subtarget,
1435 GEN_INT (val - ((val - comp) | ~mask))));
1437 num_insns += 2;
1438 return num_insns;
1440 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1442 if (generate)
1444 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1445 emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
1446 emit_insn (gen_adddi3 (dest, subtarget,
1447 GEN_INT (val - (val | ~mask))));
1449 num_insns += 2;
1450 return num_insns;
1454 /* See if we can do it by arithmetically combining two
1455 immediates. */
1456 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1458 int j;
1459 mask = 0xffff;
1461 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1462 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1464 if (generate)
1466 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1467 emit_insn (gen_rtx_SET (subtarget,
1468 GEN_INT (aarch64_bitmasks[i])));
1469 emit_insn (gen_adddi3 (dest, subtarget,
1470 GEN_INT (val - aarch64_bitmasks[i])));
1472 num_insns += 2;
1473 return num_insns;
1476 for (j = 0; j < 64; j += 16, mask <<= 16)
1478 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1480 if (generate)
1482 emit_insn (gen_rtx_SET (dest,
1483 GEN_INT (aarch64_bitmasks[i])));
1484 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1485 GEN_INT ((val >> j) & 0xffff)));
1487 num_insns += 2;
1488 return num_insns;
1493 /* See if we can do it by logically combining two immediates. */
1494 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1496 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1498 int j;
1500 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1501 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1503 if (generate)
1505 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1506 emit_insn (gen_rtx_SET (subtarget,
1507 GEN_INT (aarch64_bitmasks[i])));
1508 emit_insn (gen_iordi3 (dest, subtarget,
1509 GEN_INT (aarch64_bitmasks[j])));
1511 num_insns += 2;
1512 return num_insns;
1515 else if ((val & aarch64_bitmasks[i]) == val)
1517 int j;
1519 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1520 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1522 if (generate)
1524 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1525 emit_insn (gen_rtx_SET (subtarget,
1526 GEN_INT (aarch64_bitmasks[j])));
1527 emit_insn (gen_anddi3 (dest, subtarget,
1528 GEN_INT (aarch64_bitmasks[i])));
1530 num_insns += 2;
1531 return num_insns;
1536 if (one_match > zero_match)
1538 /* Set either first three quarters or all but the third. */
1539 mask = 0xffffll << (16 - first_not_ffff_match);
1540 if (generate)
1541 emit_insn (gen_rtx_SET (dest,
1542 GEN_INT (val | mask | 0xffffffff00000000ull)));
1543 num_insns ++;
1545 /* Now insert other two quarters. */
1546 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1547 i < 64; i += 16, mask <<= 16)
1549 if ((val & mask) != mask)
1551 if (generate)
1552 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1553 GEN_INT ((val >> i) & 0xffff)));
1554 num_insns ++;
1557 return num_insns;
1560 simple_sequence:
1561 first = true;
1562 mask = 0xffff;
1563 for (i = 0; i < 64; i += 16, mask <<= 16)
1565 if ((val & mask) != 0)
1567 if (first)
1569 if (generate)
1570 emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
1571 num_insns ++;
1572 first = false;
1574 else
1576 if (generate)
1577 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1578 GEN_INT ((val >> i) & 0xffff)));
1579 num_insns ++;
1584 return num_insns;
1588 void
1589 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1591 machine_mode mode = GET_MODE (dest);
1593 gcc_assert (mode == SImode || mode == DImode);
1595 /* Check on what type of symbol it is. */
1596 if (GET_CODE (imm) == SYMBOL_REF
1597 || GET_CODE (imm) == LABEL_REF
1598 || GET_CODE (imm) == CONST)
1600 rtx mem, base, offset;
1601 enum aarch64_symbol_type sty;
1603 /* If we have (const (plus symbol offset)), separate out the offset
1604 before we start classifying the symbol. */
1605 split_const (imm, &base, &offset);
1607 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1608 switch (sty)
1610 case SYMBOL_FORCE_TO_MEM:
1611 if (offset != const0_rtx
1612 && targetm.cannot_force_const_mem (mode, imm))
1614 gcc_assert (can_create_pseudo_p ());
1615 base = aarch64_force_temporary (mode, dest, base);
1616 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1617 aarch64_emit_move (dest, base);
1618 return;
1620 mem = force_const_mem (ptr_mode, imm);
1621 gcc_assert (mem);
1622 if (mode != ptr_mode)
1623 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1624 emit_insn (gen_rtx_SET (dest, mem));
1625 return;
1627 case SYMBOL_SMALL_TLSGD:
1628 case SYMBOL_SMALL_TLSDESC:
1629 case SYMBOL_SMALL_GOTTPREL:
1630 case SYMBOL_SMALL_GOT_28K:
1631 case SYMBOL_SMALL_GOT_4G:
1632 case SYMBOL_TINY_GOT:
1633 if (offset != const0_rtx)
1635 gcc_assert(can_create_pseudo_p ());
1636 base = aarch64_force_temporary (mode, dest, base);
1637 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1638 aarch64_emit_move (dest, base);
1639 return;
1641 /* FALLTHRU */
1643 case SYMBOL_SMALL_ABSOLUTE:
1644 case SYMBOL_TINY_ABSOLUTE:
1645 case SYMBOL_TLSLE:
1646 aarch64_load_symref_appropriately (dest, imm, sty);
1647 return;
1649 default:
1650 gcc_unreachable ();
1654 if (!CONST_INT_P (imm))
1656 if (GET_CODE (imm) == HIGH)
1657 emit_insn (gen_rtx_SET (dest, imm));
1658 else
1660 rtx mem = force_const_mem (mode, imm);
1661 gcc_assert (mem);
1662 emit_insn (gen_rtx_SET (dest, mem));
1665 return;
1668 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1671 static bool
1672 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1673 tree exp ATTRIBUTE_UNUSED)
1675 /* Currently, always true. */
1676 return true;
1679 /* Implement TARGET_PASS_BY_REFERENCE. */
1681 static bool
1682 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1683 machine_mode mode,
1684 const_tree type,
1685 bool named ATTRIBUTE_UNUSED)
1687 HOST_WIDE_INT size;
1688 machine_mode dummymode;
1689 int nregs;
1691 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1692 size = (mode == BLKmode && type)
1693 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1695 /* Aggregates are passed by reference based on their size. */
1696 if (type && AGGREGATE_TYPE_P (type))
1698 size = int_size_in_bytes (type);
1701 /* Variable sized arguments are always returned by reference. */
1702 if (size < 0)
1703 return true;
1705 /* Can this be a candidate to be passed in fp/simd register(s)? */
1706 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1707 &dummymode, &nregs,
1708 NULL))
1709 return false;
1711 /* Arguments which are variable sized or larger than 2 registers are
1712 passed by reference unless they are a homogenous floating point
1713 aggregate. */
1714 return size > 2 * UNITS_PER_WORD;
1717 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1718 static bool
1719 aarch64_return_in_msb (const_tree valtype)
1721 machine_mode dummy_mode;
1722 int dummy_int;
1724 /* Never happens in little-endian mode. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return false;
1728 /* Only composite types smaller than or equal to 16 bytes can
1729 be potentially returned in registers. */
1730 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1731 || int_size_in_bytes (valtype) <= 0
1732 || int_size_in_bytes (valtype) > 16)
1733 return false;
1735 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1736 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1737 is always passed/returned in the least significant bits of fp/simd
1738 register(s). */
1739 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1740 &dummy_mode, &dummy_int, NULL))
1741 return false;
1743 return true;
1746 /* Implement TARGET_FUNCTION_VALUE.
1747 Define how to find the value returned by a function. */
1749 static rtx
1750 aarch64_function_value (const_tree type, const_tree func,
1751 bool outgoing ATTRIBUTE_UNUSED)
1753 machine_mode mode;
1754 int unsignedp;
1755 int count;
1756 machine_mode ag_mode;
1758 mode = TYPE_MODE (type);
1759 if (INTEGRAL_TYPE_P (type))
1760 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1762 if (aarch64_return_in_msb (type))
1764 HOST_WIDE_INT size = int_size_in_bytes (type);
1766 if (size % UNITS_PER_WORD != 0)
1768 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1769 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1773 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1774 &ag_mode, &count, NULL))
1776 if (!aarch64_composite_type_p (type, mode))
1778 gcc_assert (count == 1 && mode == ag_mode);
1779 return gen_rtx_REG (mode, V0_REGNUM);
1781 else
1783 int i;
1784 rtx par;
1786 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1787 for (i = 0; i < count; i++)
1789 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1790 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1791 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1792 XVECEXP (par, 0, i) = tmp;
1794 return par;
1797 else
1798 return gen_rtx_REG (mode, R0_REGNUM);
1801 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1802 Return true if REGNO is the number of a hard register in which the values
1803 of called function may come back. */
1805 static bool
1806 aarch64_function_value_regno_p (const unsigned int regno)
1808 /* Maximum of 16 bytes can be returned in the general registers. Examples
1809 of 16-byte return values are: 128-bit integers and 16-byte small
1810 structures (excluding homogeneous floating-point aggregates). */
1811 if (regno == R0_REGNUM || regno == R1_REGNUM)
1812 return true;
1814 /* Up to four fp/simd registers can return a function value, e.g. a
1815 homogeneous floating-point aggregate having four members. */
1816 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1817 return TARGET_FLOAT;
1819 return false;
1822 /* Implement TARGET_RETURN_IN_MEMORY.
1824 If the type T of the result of a function is such that
1825 void func (T arg)
1826 would require that arg be passed as a value in a register (or set of
1827 registers) according to the parameter passing rules, then the result
1828 is returned in the same registers as would be used for such an
1829 argument. */
1831 static bool
1832 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1834 HOST_WIDE_INT size;
1835 machine_mode ag_mode;
1836 int count;
1838 if (!AGGREGATE_TYPE_P (type)
1839 && TREE_CODE (type) != COMPLEX_TYPE
1840 && TREE_CODE (type) != VECTOR_TYPE)
1841 /* Simple scalar types always returned in registers. */
1842 return false;
1844 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1845 type,
1846 &ag_mode,
1847 &count,
1848 NULL))
1849 return false;
1851 /* Types larger than 2 registers returned in memory. */
1852 size = int_size_in_bytes (type);
1853 return (size < 0 || size > 2 * UNITS_PER_WORD);
1856 static bool
1857 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1858 const_tree type, int *nregs)
1860 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1861 return aarch64_vfp_is_call_or_return_candidate (mode,
1862 type,
1863 &pcum->aapcs_vfp_rmode,
1864 nregs,
1865 NULL);
1868 /* Given MODE and TYPE of a function argument, return the alignment in
1869 bits. The idea is to suppress any stronger alignment requested by
1870 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1871 This is a helper function for local use only. */
1873 static unsigned int
1874 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1876 unsigned int alignment;
1878 if (type)
1880 if (!integer_zerop (TYPE_SIZE (type)))
1882 if (TYPE_MODE (type) == mode)
1883 alignment = TYPE_ALIGN (type);
1884 else
1885 alignment = GET_MODE_ALIGNMENT (mode);
1887 else
1888 alignment = 0;
1890 else
1891 alignment = GET_MODE_ALIGNMENT (mode);
1893 return alignment;
1896 /* Layout a function argument according to the AAPCS64 rules. The rule
1897 numbers refer to the rule numbers in the AAPCS64. */
1899 static void
1900 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1901 const_tree type,
1902 bool named ATTRIBUTE_UNUSED)
1904 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1905 int ncrn, nvrn, nregs;
1906 bool allocate_ncrn, allocate_nvrn;
1907 HOST_WIDE_INT size;
1909 /* We need to do this once per argument. */
1910 if (pcum->aapcs_arg_processed)
1911 return;
1913 pcum->aapcs_arg_processed = true;
1915 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1916 size
1917 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1918 UNITS_PER_WORD);
1920 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1921 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1922 mode,
1923 type,
1924 &nregs);
1926 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1927 The following code thus handles passing by SIMD/FP registers first. */
1929 nvrn = pcum->aapcs_nvrn;
1931 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1932 and homogenous short-vector aggregates (HVA). */
1933 if (allocate_nvrn)
1935 if (!TARGET_FLOAT)
1936 aarch64_err_no_fpadvsimd (mode, "argument");
1938 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1940 pcum->aapcs_nextnvrn = nvrn + nregs;
1941 if (!aarch64_composite_type_p (type, mode))
1943 gcc_assert (nregs == 1);
1944 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1946 else
1948 rtx par;
1949 int i;
1950 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1951 for (i = 0; i < nregs; i++)
1953 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1954 V0_REGNUM + nvrn + i);
1955 tmp = gen_rtx_EXPR_LIST
1956 (VOIDmode, tmp,
1957 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1958 XVECEXP (par, 0, i) = tmp;
1960 pcum->aapcs_reg = par;
1962 return;
1964 else
1966 /* C.3 NSRN is set to 8. */
1967 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1968 goto on_stack;
1972 ncrn = pcum->aapcs_ncrn;
1973 nregs = size / UNITS_PER_WORD;
1975 /* C6 - C9. though the sign and zero extension semantics are
1976 handled elsewhere. This is the case where the argument fits
1977 entirely general registers. */
1978 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1980 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1982 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1984 /* C.8 if the argument has an alignment of 16 then the NGRN is
1985 rounded up to the next even number. */
1986 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1988 ++ncrn;
1989 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1991 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1992 A reg is still generated for it, but the caller should be smart
1993 enough not to use it. */
1994 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1996 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1998 else
2000 rtx par;
2001 int i;
2003 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2004 for (i = 0; i < nregs; i++)
2006 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2007 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2008 GEN_INT (i * UNITS_PER_WORD));
2009 XVECEXP (par, 0, i) = tmp;
2011 pcum->aapcs_reg = par;
2014 pcum->aapcs_nextncrn = ncrn + nregs;
2015 return;
2018 /* C.11 */
2019 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2021 /* The argument is passed on stack; record the needed number of words for
2022 this argument and align the total size if necessary. */
2023 on_stack:
2024 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2025 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2026 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
2027 16 / UNITS_PER_WORD);
2028 return;
2031 /* Implement TARGET_FUNCTION_ARG. */
2033 static rtx
2034 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
2035 const_tree type, bool named)
2037 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2038 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2040 if (mode == VOIDmode)
2041 return NULL_RTX;
2043 aarch64_layout_arg (pcum_v, mode, type, named);
2044 return pcum->aapcs_reg;
2047 void
2048 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2049 const_tree fntype ATTRIBUTE_UNUSED,
2050 rtx libname ATTRIBUTE_UNUSED,
2051 const_tree fndecl ATTRIBUTE_UNUSED,
2052 unsigned n_named ATTRIBUTE_UNUSED)
2054 pcum->aapcs_ncrn = 0;
2055 pcum->aapcs_nvrn = 0;
2056 pcum->aapcs_nextncrn = 0;
2057 pcum->aapcs_nextnvrn = 0;
2058 pcum->pcs_variant = ARM_PCS_AAPCS64;
2059 pcum->aapcs_reg = NULL_RTX;
2060 pcum->aapcs_arg_processed = false;
2061 pcum->aapcs_stack_words = 0;
2062 pcum->aapcs_stack_size = 0;
2064 if (!TARGET_FLOAT
2065 && fndecl && TREE_PUBLIC (fndecl)
2066 && fntype && fntype != error_mark_node)
2068 const_tree type = TREE_TYPE (fntype);
2069 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2070 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2071 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2072 &mode, &nregs, NULL))
2073 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2075 return;
2078 static void
2079 aarch64_function_arg_advance (cumulative_args_t pcum_v,
2080 machine_mode mode,
2081 const_tree type,
2082 bool named)
2084 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2085 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2087 aarch64_layout_arg (pcum_v, mode, type, named);
2088 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2089 != (pcum->aapcs_stack_words != 0));
2090 pcum->aapcs_arg_processed = false;
2091 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2092 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2093 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2094 pcum->aapcs_stack_words = 0;
2095 pcum->aapcs_reg = NULL_RTX;
2099 bool
2100 aarch64_function_arg_regno_p (unsigned regno)
2102 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2103 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2106 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2107 PARM_BOUNDARY bits of alignment, but will be given anything up
2108 to STACK_BOUNDARY bits if the type requires it. This makes sure
2109 that both before and after the layout of each argument, the Next
2110 Stacked Argument Address (NSAA) will have a minimum alignment of
2111 8 bytes. */
2113 static unsigned int
2114 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
2116 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2118 if (alignment < PARM_BOUNDARY)
2119 alignment = PARM_BOUNDARY;
2120 if (alignment > STACK_BOUNDARY)
2121 alignment = STACK_BOUNDARY;
2122 return alignment;
2125 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2127 Return true if an argument passed on the stack should be padded upwards,
2128 i.e. if the least-significant byte of the stack slot has useful data.
2130 Small aggregate types are placed in the lowest memory address.
2132 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2134 bool
2135 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
2137 /* On little-endian targets, the least significant byte of every stack
2138 argument is passed at the lowest byte address of the stack slot. */
2139 if (!BYTES_BIG_ENDIAN)
2140 return true;
2142 /* Otherwise, integral, floating-point and pointer types are padded downward:
2143 the least significant byte of a stack argument is passed at the highest
2144 byte address of the stack slot. */
2145 if (type
2146 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2147 || POINTER_TYPE_P (type))
2148 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2149 return false;
2151 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2152 return true;
2155 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2157 It specifies padding for the last (may also be the only)
2158 element of a block move between registers and memory. If
2159 assuming the block is in the memory, padding upward means that
2160 the last element is padded after its highest significant byte,
2161 while in downward padding, the last element is padded at the
2162 its least significant byte side.
2164 Small aggregates and small complex types are always padded
2165 upwards.
2167 We don't need to worry about homogeneous floating-point or
2168 short-vector aggregates; their move is not affected by the
2169 padding direction determined here. Regardless of endianness,
2170 each element of such an aggregate is put in the least
2171 significant bits of a fp/simd register.
2173 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2174 register has useful data, and return the opposite if the most
2175 significant byte does. */
2177 bool
2178 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
2179 bool first ATTRIBUTE_UNUSED)
2182 /* Small composite types are always padded upward. */
2183 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2185 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2186 : GET_MODE_SIZE (mode));
2187 if (size < 2 * UNITS_PER_WORD)
2188 return true;
2191 /* Otherwise, use the default padding. */
2192 return !BYTES_BIG_ENDIAN;
2195 static machine_mode
2196 aarch64_libgcc_cmp_return_mode (void)
2198 return SImode;
2201 static bool
2202 aarch64_frame_pointer_required (void)
2204 /* In aarch64_override_options_after_change
2205 flag_omit_leaf_frame_pointer turns off the frame pointer by
2206 default. Turn it back on now if we've not got a leaf
2207 function. */
2208 if (flag_omit_leaf_frame_pointer
2209 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2210 return true;
2212 return false;
2215 /* Mark the registers that need to be saved by the callee and calculate
2216 the size of the callee-saved registers area and frame record (both FP
2217 and LR may be omitted). */
2218 static void
2219 aarch64_layout_frame (void)
2221 HOST_WIDE_INT offset = 0;
2222 int regno;
2224 if (reload_completed && cfun->machine->frame.laid_out)
2225 return;
2227 #define SLOT_NOT_REQUIRED (-2)
2228 #define SLOT_REQUIRED (-1)
2230 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2231 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2233 /* First mark all the registers that really need to be saved... */
2234 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2235 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2237 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2238 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2240 /* ... that includes the eh data registers (if needed)... */
2241 if (crtl->calls_eh_return)
2242 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2243 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2244 = SLOT_REQUIRED;
2246 /* ... and any callee saved register that dataflow says is live. */
2247 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2248 if (df_regs_ever_live_p (regno)
2249 && (regno == R30_REGNUM
2250 || !call_used_regs[regno]))
2251 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2253 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2254 if (df_regs_ever_live_p (regno)
2255 && !call_used_regs[regno])
2256 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2258 if (frame_pointer_needed)
2260 /* FP and LR are placed in the linkage record. */
2261 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2262 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2263 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2264 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2265 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2266 offset += 2 * UNITS_PER_WORD;
2269 /* Now assign stack slots for them. */
2270 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2271 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2273 cfun->machine->frame.reg_offset[regno] = offset;
2274 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2275 cfun->machine->frame.wb_candidate1 = regno;
2276 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2277 cfun->machine->frame.wb_candidate2 = regno;
2278 offset += UNITS_PER_WORD;
2281 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2282 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2284 cfun->machine->frame.reg_offset[regno] = offset;
2285 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2286 cfun->machine->frame.wb_candidate1 = regno;
2287 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2288 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2289 cfun->machine->frame.wb_candidate2 = regno;
2290 offset += UNITS_PER_WORD;
2293 cfun->machine->frame.padding0 =
2294 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2295 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2297 cfun->machine->frame.saved_regs_size = offset;
2299 cfun->machine->frame.hard_fp_offset
2300 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2301 + get_frame_size ()
2302 + cfun->machine->frame.saved_regs_size,
2303 STACK_BOUNDARY / BITS_PER_UNIT);
2305 cfun->machine->frame.frame_size
2306 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2307 + crtl->outgoing_args_size,
2308 STACK_BOUNDARY / BITS_PER_UNIT);
2310 cfun->machine->frame.laid_out = true;
2313 static bool
2314 aarch64_register_saved_on_entry (int regno)
2316 return cfun->machine->frame.reg_offset[regno] >= 0;
2319 static unsigned
2320 aarch64_next_callee_save (unsigned regno, unsigned limit)
2322 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2323 regno ++;
2324 return regno;
2327 static void
2328 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2329 HOST_WIDE_INT adjustment)
2331 rtx base_rtx = stack_pointer_rtx;
2332 rtx insn, reg, mem;
2334 reg = gen_rtx_REG (mode, regno);
2335 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2336 plus_constant (Pmode, base_rtx, -adjustment));
2337 mem = gen_rtx_MEM (mode, mem);
2339 insn = emit_move_insn (mem, reg);
2340 RTX_FRAME_RELATED_P (insn) = 1;
2343 static rtx
2344 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2345 HOST_WIDE_INT adjustment)
2347 switch (mode)
2349 case DImode:
2350 return gen_storewb_pairdi_di (base, base, reg, reg2,
2351 GEN_INT (-adjustment),
2352 GEN_INT (UNITS_PER_WORD - adjustment));
2353 case DFmode:
2354 return gen_storewb_pairdf_di (base, base, reg, reg2,
2355 GEN_INT (-adjustment),
2356 GEN_INT (UNITS_PER_WORD - adjustment));
2357 default:
2358 gcc_unreachable ();
2362 static void
2363 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2364 unsigned regno2, HOST_WIDE_INT adjustment)
2366 rtx_insn *insn;
2367 rtx reg1 = gen_rtx_REG (mode, regno1);
2368 rtx reg2 = gen_rtx_REG (mode, regno2);
2370 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2371 reg2, adjustment));
2372 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2373 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2374 RTX_FRAME_RELATED_P (insn) = 1;
2377 static rtx
2378 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2379 HOST_WIDE_INT adjustment)
2381 switch (mode)
2383 case DImode:
2384 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2385 GEN_INT (UNITS_PER_WORD));
2386 case DFmode:
2387 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2388 GEN_INT (UNITS_PER_WORD));
2389 default:
2390 gcc_unreachable ();
2394 static rtx
2395 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2396 rtx reg2)
2398 switch (mode)
2400 case DImode:
2401 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2403 case DFmode:
2404 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2406 default:
2407 gcc_unreachable ();
2411 static rtx
2412 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2413 rtx mem2)
2415 switch (mode)
2417 case DImode:
2418 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2420 case DFmode:
2421 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2423 default:
2424 gcc_unreachable ();
2429 static void
2430 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2431 unsigned start, unsigned limit, bool skip_wb)
2433 rtx_insn *insn;
2434 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2435 ? gen_frame_mem : gen_rtx_MEM);
2436 unsigned regno;
2437 unsigned regno2;
2439 for (regno = aarch64_next_callee_save (start, limit);
2440 regno <= limit;
2441 regno = aarch64_next_callee_save (regno + 1, limit))
2443 rtx reg, mem;
2444 HOST_WIDE_INT offset;
2446 if (skip_wb
2447 && (regno == cfun->machine->frame.wb_candidate1
2448 || regno == cfun->machine->frame.wb_candidate2))
2449 continue;
2451 reg = gen_rtx_REG (mode, regno);
2452 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2453 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2454 offset));
2456 regno2 = aarch64_next_callee_save (regno + 1, limit);
2458 if (regno2 <= limit
2459 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2460 == cfun->machine->frame.reg_offset[regno2]))
2463 rtx reg2 = gen_rtx_REG (mode, regno2);
2464 rtx mem2;
2466 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2467 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2468 offset));
2469 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2470 reg2));
2472 /* The first part of a frame-related parallel insn is
2473 always assumed to be relevant to the frame
2474 calculations; subsequent parts, are only
2475 frame-related if explicitly marked. */
2476 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2477 regno = regno2;
2479 else
2480 insn = emit_move_insn (mem, reg);
2482 RTX_FRAME_RELATED_P (insn) = 1;
2486 static void
2487 aarch64_restore_callee_saves (machine_mode mode,
2488 HOST_WIDE_INT start_offset, unsigned start,
2489 unsigned limit, bool skip_wb, rtx *cfi_ops)
2491 rtx base_rtx = stack_pointer_rtx;
2492 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2493 ? gen_frame_mem : gen_rtx_MEM);
2494 unsigned regno;
2495 unsigned regno2;
2496 HOST_WIDE_INT offset;
2498 for (regno = aarch64_next_callee_save (start, limit);
2499 regno <= limit;
2500 regno = aarch64_next_callee_save (regno + 1, limit))
2502 rtx reg, mem;
2504 if (skip_wb
2505 && (regno == cfun->machine->frame.wb_candidate1
2506 || regno == cfun->machine->frame.wb_candidate2))
2507 continue;
2509 reg = gen_rtx_REG (mode, regno);
2510 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2511 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2513 regno2 = aarch64_next_callee_save (regno + 1, limit);
2515 if (regno2 <= limit
2516 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2517 == cfun->machine->frame.reg_offset[regno2]))
2519 rtx reg2 = gen_rtx_REG (mode, regno2);
2520 rtx mem2;
2522 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2523 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2524 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2526 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2527 regno = regno2;
2529 else
2530 emit_move_insn (reg, mem);
2531 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2535 /* AArch64 stack frames generated by this compiler look like:
2537 +-------------------------------+
2539 | incoming stack arguments |
2541 +-------------------------------+
2542 | | <-- incoming stack pointer (aligned)
2543 | callee-allocated save area |
2544 | for register varargs |
2546 +-------------------------------+
2547 | local variables | <-- frame_pointer_rtx
2549 +-------------------------------+
2550 | padding0 | \
2551 +-------------------------------+ |
2552 | callee-saved registers | | frame.saved_regs_size
2553 +-------------------------------+ |
2554 | LR' | |
2555 +-------------------------------+ |
2556 | FP' | / <- hard_frame_pointer_rtx (aligned)
2557 +-------------------------------+
2558 | dynamic allocation |
2559 +-------------------------------+
2560 | padding |
2561 +-------------------------------+
2562 | outgoing stack arguments | <-- arg_pointer
2564 +-------------------------------+
2565 | | <-- stack_pointer_rtx (aligned)
2567 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2568 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2569 unchanged. */
2571 /* Generate the prologue instructions for entry into a function.
2572 Establish the stack frame by decreasing the stack pointer with a
2573 properly calculated size and, if necessary, create a frame record
2574 filled with the values of LR and previous frame pointer. The
2575 current FP is also set up if it is in use. */
2577 void
2578 aarch64_expand_prologue (void)
2580 /* sub sp, sp, #<frame_size>
2581 stp {fp, lr}, [sp, #<frame_size> - 16]
2582 add fp, sp, #<frame_size> - hardfp_offset
2583 stp {cs_reg}, [fp, #-16] etc.
2585 sub sp, sp, <final_adjustment_if_any>
2587 HOST_WIDE_INT frame_size, offset;
2588 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2589 HOST_WIDE_INT hard_fp_offset;
2590 rtx_insn *insn;
2592 aarch64_layout_frame ();
2594 offset = frame_size = cfun->machine->frame.frame_size;
2595 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2596 fp_offset = frame_size - hard_fp_offset;
2598 if (flag_stack_usage_info)
2599 current_function_static_stack_size = frame_size;
2601 /* Store pairs and load pairs have a range only -512 to 504. */
2602 if (offset >= 512)
2604 /* When the frame has a large size, an initial decrease is done on
2605 the stack pointer to jump over the callee-allocated save area for
2606 register varargs, the local variable area and/or the callee-saved
2607 register area. This will allow the pre-index write-back
2608 store pair instructions to be used for setting up the stack frame
2609 efficiently. */
2610 offset = hard_fp_offset;
2611 if (offset >= 512)
2612 offset = cfun->machine->frame.saved_regs_size;
2614 frame_size -= (offset + crtl->outgoing_args_size);
2615 fp_offset = 0;
2617 if (frame_size >= 0x1000000)
2619 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2620 emit_move_insn (op0, GEN_INT (-frame_size));
2621 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2623 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2624 gen_rtx_SET (stack_pointer_rtx,
2625 plus_constant (Pmode, stack_pointer_rtx,
2626 -frame_size)));
2627 RTX_FRAME_RELATED_P (insn) = 1;
2629 else if (frame_size > 0)
2631 int hi_ofs = frame_size & 0xfff000;
2632 int lo_ofs = frame_size & 0x000fff;
2634 if (hi_ofs)
2636 insn = emit_insn (gen_add2_insn
2637 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2638 RTX_FRAME_RELATED_P (insn) = 1;
2640 if (lo_ofs)
2642 insn = emit_insn (gen_add2_insn
2643 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2644 RTX_FRAME_RELATED_P (insn) = 1;
2648 else
2649 frame_size = -1;
2651 if (offset > 0)
2653 bool skip_wb = false;
2655 if (frame_pointer_needed)
2657 skip_wb = true;
2659 if (fp_offset)
2661 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2662 GEN_INT (-offset)));
2663 RTX_FRAME_RELATED_P (insn) = 1;
2665 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2666 R30_REGNUM, false);
2668 else
2669 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2671 /* Set up frame pointer to point to the location of the
2672 previous frame pointer on the stack. */
2673 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2674 stack_pointer_rtx,
2675 GEN_INT (fp_offset)));
2676 RTX_FRAME_RELATED_P (insn) = 1;
2677 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2679 else
2681 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2682 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2684 if (fp_offset
2685 || reg1 == FIRST_PSEUDO_REGISTER
2686 || (reg2 == FIRST_PSEUDO_REGISTER
2687 && offset >= 256))
2689 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2690 GEN_INT (-offset)));
2691 RTX_FRAME_RELATED_P (insn) = 1;
2693 else
2695 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2697 skip_wb = true;
2699 if (reg2 == FIRST_PSEUDO_REGISTER)
2700 aarch64_pushwb_single_reg (mode1, reg1, offset);
2701 else
2702 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2706 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2707 skip_wb);
2708 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2709 skip_wb);
2712 /* when offset >= 512,
2713 sub sp, sp, #<outgoing_args_size> */
2714 if (frame_size > -1)
2716 if (crtl->outgoing_args_size > 0)
2718 insn = emit_insn (gen_add2_insn
2719 (stack_pointer_rtx,
2720 GEN_INT (- crtl->outgoing_args_size)));
2721 RTX_FRAME_RELATED_P (insn) = 1;
2726 /* Return TRUE if we can use a simple_return insn.
2728 This function checks whether the callee saved stack is empty, which
2729 means no restore actions are need. The pro_and_epilogue will use
2730 this to check whether shrink-wrapping opt is feasible. */
2732 bool
2733 aarch64_use_return_insn_p (void)
2735 if (!reload_completed)
2736 return false;
2738 if (crtl->profile)
2739 return false;
2741 aarch64_layout_frame ();
2743 return cfun->machine->frame.frame_size == 0;
2746 /* Generate the epilogue instructions for returning from a function. */
2747 void
2748 aarch64_expand_epilogue (bool for_sibcall)
2750 HOST_WIDE_INT frame_size, offset;
2751 HOST_WIDE_INT fp_offset;
2752 HOST_WIDE_INT hard_fp_offset;
2753 rtx_insn *insn;
2754 /* We need to add memory barrier to prevent read from deallocated stack. */
2755 bool need_barrier_p = (get_frame_size () != 0
2756 || cfun->machine->frame.saved_varargs_size);
2758 aarch64_layout_frame ();
2760 offset = frame_size = cfun->machine->frame.frame_size;
2761 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2762 fp_offset = frame_size - hard_fp_offset;
2764 /* Store pairs and load pairs have a range only -512 to 504. */
2765 if (offset >= 512)
2767 offset = hard_fp_offset;
2768 if (offset >= 512)
2769 offset = cfun->machine->frame.saved_regs_size;
2771 frame_size -= (offset + crtl->outgoing_args_size);
2772 fp_offset = 0;
2773 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2775 insn = emit_insn (gen_add2_insn
2776 (stack_pointer_rtx,
2777 GEN_INT (crtl->outgoing_args_size)));
2778 RTX_FRAME_RELATED_P (insn) = 1;
2781 else
2782 frame_size = -1;
2784 /* If there were outgoing arguments or we've done dynamic stack
2785 allocation, then restore the stack pointer from the frame
2786 pointer. This is at most one insn and more efficient than using
2787 GCC's internal mechanism. */
2788 if (frame_pointer_needed
2789 && (crtl->outgoing_args_size || cfun->calls_alloca))
2791 if (cfun->calls_alloca)
2792 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2794 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2795 hard_frame_pointer_rtx,
2796 GEN_INT (0)));
2797 offset = offset - fp_offset;
2800 if (offset > 0)
2802 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2803 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2804 bool skip_wb = true;
2805 rtx cfi_ops = NULL;
2807 if (frame_pointer_needed)
2808 fp_offset = 0;
2809 else if (fp_offset
2810 || reg1 == FIRST_PSEUDO_REGISTER
2811 || (reg2 == FIRST_PSEUDO_REGISTER
2812 && offset >= 256))
2813 skip_wb = false;
2815 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2816 skip_wb, &cfi_ops);
2817 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2818 skip_wb, &cfi_ops);
2820 if (need_barrier_p)
2821 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2823 if (skip_wb)
2825 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2826 rtx rreg1 = gen_rtx_REG (mode1, reg1);
2828 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2829 if (reg2 == FIRST_PSEUDO_REGISTER)
2831 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2832 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2833 mem = gen_rtx_MEM (mode1, mem);
2834 insn = emit_move_insn (rreg1, mem);
2836 else
2838 rtx rreg2 = gen_rtx_REG (mode1, reg2);
2840 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2841 insn = emit_insn (aarch64_gen_loadwb_pair
2842 (mode1, stack_pointer_rtx, rreg1,
2843 rreg2, offset));
2846 else
2848 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2849 GEN_INT (offset)));
2852 /* Reset the CFA to be SP + FRAME_SIZE. */
2853 rtx new_cfa = stack_pointer_rtx;
2854 if (frame_size > 0)
2855 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2856 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2857 REG_NOTES (insn) = cfi_ops;
2858 RTX_FRAME_RELATED_P (insn) = 1;
2861 if (frame_size > 0)
2863 if (need_barrier_p)
2864 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2866 if (frame_size >= 0x1000000)
2868 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2869 emit_move_insn (op0, GEN_INT (frame_size));
2870 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2872 else
2874 int hi_ofs = frame_size & 0xfff000;
2875 int lo_ofs = frame_size & 0x000fff;
2877 if (hi_ofs && lo_ofs)
2879 insn = emit_insn (gen_add2_insn
2880 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2881 RTX_FRAME_RELATED_P (insn) = 1;
2882 frame_size = lo_ofs;
2884 insn = emit_insn (gen_add2_insn
2885 (stack_pointer_rtx, GEN_INT (frame_size)));
2888 /* Reset the CFA to be SP + 0. */
2889 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2890 RTX_FRAME_RELATED_P (insn) = 1;
2893 /* Stack adjustment for exception handler. */
2894 if (crtl->calls_eh_return)
2896 /* We need to unwind the stack by the offset computed by
2897 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2898 to be SP; letting the CFA move during this adjustment
2899 is just as correct as retaining the CFA from the body
2900 of the function. Therefore, do nothing special. */
2901 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2904 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2905 if (!for_sibcall)
2906 emit_jump_insn (ret_rtx);
2909 /* Return the place to copy the exception unwinding return address to.
2910 This will probably be a stack slot, but could (in theory be the
2911 return register). */
2913 aarch64_final_eh_return_addr (void)
2915 HOST_WIDE_INT fp_offset;
2917 aarch64_layout_frame ();
2919 fp_offset = cfun->machine->frame.frame_size
2920 - cfun->machine->frame.hard_fp_offset;
2922 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2923 return gen_rtx_REG (DImode, LR_REGNUM);
2925 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2926 result in a store to save LR introduced by builtin_eh_return () being
2927 incorrectly deleted because the alias is not detected.
2928 So in the calculation of the address to copy the exception unwinding
2929 return address to, we note 2 cases.
2930 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2931 we return a SP-relative location since all the addresses are SP-relative
2932 in this case. This prevents the store from being optimized away.
2933 If the fp_offset is not 0, then the addresses will be FP-relative and
2934 therefore we return a FP-relative location. */
2936 if (frame_pointer_needed)
2938 if (fp_offset)
2939 return gen_frame_mem (DImode,
2940 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2941 else
2942 return gen_frame_mem (DImode,
2943 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2946 /* If FP is not needed, we calculate the location of LR, which would be
2947 at the top of the saved registers block. */
2949 return gen_frame_mem (DImode,
2950 plus_constant (Pmode,
2951 stack_pointer_rtx,
2952 fp_offset
2953 + cfun->machine->frame.saved_regs_size
2954 - 2 * UNITS_PER_WORD));
2957 /* Possibly output code to build up a constant in a register. For
2958 the benefit of the costs infrastructure, returns the number of
2959 instructions which would be emitted. GENERATE inhibits or
2960 enables code generation. */
2962 static int
2963 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2965 int insns = 0;
2967 if (aarch64_bitmask_imm (val, DImode))
2969 if (generate)
2970 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2971 insns = 1;
2973 else
2975 int i;
2976 int ncount = 0;
2977 int zcount = 0;
2978 HOST_WIDE_INT valp = val >> 16;
2979 HOST_WIDE_INT valm;
2980 HOST_WIDE_INT tval;
2982 for (i = 16; i < 64; i += 16)
2984 valm = (valp & 0xffff);
2986 if (valm != 0)
2987 ++ zcount;
2989 if (valm != 0xffff)
2990 ++ ncount;
2992 valp >>= 16;
2995 /* zcount contains the number of additional MOVK instructions
2996 required if the constant is built up with an initial MOVZ instruction,
2997 while ncount is the number of MOVK instructions required if starting
2998 with a MOVN instruction. Choose the sequence that yields the fewest
2999 number of instructions, preferring MOVZ instructions when they are both
3000 the same. */
3001 if (ncount < zcount)
3003 if (generate)
3004 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3005 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
3006 tval = 0xffff;
3007 insns++;
3009 else
3011 if (generate)
3012 emit_move_insn (gen_rtx_REG (Pmode, regnum),
3013 GEN_INT (val & 0xffff));
3014 tval = 0;
3015 insns++;
3018 val >>= 16;
3020 for (i = 16; i < 64; i += 16)
3022 if ((val & 0xffff) != tval)
3024 if (generate)
3025 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
3026 GEN_INT (i),
3027 GEN_INT (val & 0xffff)));
3028 insns++;
3030 val >>= 16;
3033 return insns;
3036 static void
3037 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
3039 HOST_WIDE_INT mdelta = delta;
3040 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
3041 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
3043 if (mdelta < 0)
3044 mdelta = -mdelta;
3046 if (mdelta >= 4096 * 4096)
3048 (void) aarch64_build_constant (scratchreg, delta, true);
3049 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
3051 else if (mdelta > 0)
3053 if (mdelta >= 4096)
3055 emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
3056 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
3057 if (delta < 0)
3058 emit_insn (gen_rtx_SET (this_rtx,
3059 gen_rtx_MINUS (Pmode, this_rtx, shift)));
3060 else
3061 emit_insn (gen_rtx_SET (this_rtx,
3062 gen_rtx_PLUS (Pmode, this_rtx, shift)));
3064 if (mdelta % 4096 != 0)
3066 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
3067 emit_insn (gen_rtx_SET (this_rtx,
3068 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
3073 /* Output code to add DELTA to the first argument, and then jump
3074 to FUNCTION. Used for C++ multiple inheritance. */
3075 static void
3076 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3077 HOST_WIDE_INT delta,
3078 HOST_WIDE_INT vcall_offset,
3079 tree function)
3081 /* The this pointer is always in x0. Note that this differs from
3082 Arm where the this pointer maybe bumped to r1 if r0 is required
3083 to return a pointer to an aggregate. On AArch64 a result value
3084 pointer will be in x8. */
3085 int this_regno = R0_REGNUM;
3086 rtx this_rtx, temp0, temp1, addr, funexp;
3087 rtx_insn *insn;
3089 reload_completed = 1;
3090 emit_note (NOTE_INSN_PROLOGUE_END);
3092 if (vcall_offset == 0)
3093 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
3094 else
3096 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
3098 this_rtx = gen_rtx_REG (Pmode, this_regno);
3099 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3100 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
3102 addr = this_rtx;
3103 if (delta != 0)
3105 if (delta >= -256 && delta < 256)
3106 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3107 plus_constant (Pmode, this_rtx, delta));
3108 else
3109 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
3112 if (Pmode == ptr_mode)
3113 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3114 else
3115 aarch64_emit_move (temp0,
3116 gen_rtx_ZERO_EXTEND (Pmode,
3117 gen_rtx_MEM (ptr_mode, addr)));
3119 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
3120 addr = plus_constant (Pmode, temp0, vcall_offset);
3121 else
3123 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
3124 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
3127 if (Pmode == ptr_mode)
3128 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3129 else
3130 aarch64_emit_move (temp1,
3131 gen_rtx_SIGN_EXTEND (Pmode,
3132 gen_rtx_MEM (ptr_mode, addr)));
3134 emit_insn (gen_add2_insn (this_rtx, temp1));
3137 /* Generate a tail call to the target function. */
3138 if (!TREE_USED (function))
3140 assemble_external (function);
3141 TREE_USED (function) = 1;
3143 funexp = XEXP (DECL_RTL (function), 0);
3144 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3145 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3146 SIBLING_CALL_P (insn) = 1;
3148 insn = get_insns ();
3149 shorten_branches (insn);
3150 final_start_function (insn, file, 1);
3151 final (insn, file, 1);
3152 final_end_function ();
3154 /* Stop pretending to be a post-reload pass. */
3155 reload_completed = 0;
3158 static bool
3159 aarch64_tls_referenced_p (rtx x)
3161 if (!TARGET_HAVE_TLS)
3162 return false;
3163 subrtx_iterator::array_type array;
3164 FOR_EACH_SUBRTX (iter, array, x, ALL)
3166 const_rtx x = *iter;
3167 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3168 return true;
3169 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3170 TLS offsets, not real symbol references. */
3171 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3172 iter.skip_subrtxes ();
3174 return false;
3178 static int
3179 aarch64_bitmasks_cmp (const void *i1, const void *i2)
3181 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
3182 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
3184 if (*imm1 < *imm2)
3185 return -1;
3186 if (*imm1 > *imm2)
3187 return +1;
3188 return 0;
3192 static void
3193 aarch64_build_bitmask_table (void)
3195 unsigned HOST_WIDE_INT mask, imm;
3196 unsigned int log_e, e, s, r;
3197 unsigned int nimms = 0;
3199 for (log_e = 1; log_e <= 6; log_e++)
3201 e = 1 << log_e;
3202 if (e == 64)
3203 mask = ~(HOST_WIDE_INT) 0;
3204 else
3205 mask = ((HOST_WIDE_INT) 1 << e) - 1;
3206 for (s = 1; s < e; s++)
3208 for (r = 0; r < e; r++)
3210 /* set s consecutive bits to 1 (s < 64) */
3211 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3212 /* rotate right by r */
3213 if (r != 0)
3214 imm = ((imm >> r) | (imm << (e - r))) & mask;
3215 /* replicate the constant depending on SIMD size */
3216 switch (log_e) {
3217 case 1: imm |= (imm << 2);
3218 case 2: imm |= (imm << 4);
3219 case 3: imm |= (imm << 8);
3220 case 4: imm |= (imm << 16);
3221 case 5: imm |= (imm << 32);
3222 case 6:
3223 break;
3224 default:
3225 gcc_unreachable ();
3227 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3228 aarch64_bitmasks[nimms++] = imm;
3233 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3234 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3235 aarch64_bitmasks_cmp);
3239 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3240 a left shift of 0 or 12 bits. */
3241 bool
3242 aarch64_uimm12_shift (HOST_WIDE_INT val)
3244 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3245 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3250 /* Return true if val is an immediate that can be loaded into a
3251 register by a MOVZ instruction. */
3252 static bool
3253 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3255 if (GET_MODE_SIZE (mode) > 4)
3257 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3258 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3259 return 1;
3261 else
3263 /* Ignore sign extension. */
3264 val &= (HOST_WIDE_INT) 0xffffffff;
3266 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3267 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3271 /* Return true if val is a valid bitmask immediate. */
3272 bool
3273 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3275 if (GET_MODE_SIZE (mode) < 8)
3277 /* Replicate bit pattern. */
3278 val &= (HOST_WIDE_INT) 0xffffffff;
3279 val |= val << 32;
3281 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3282 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3286 /* Return true if val is an immediate that can be loaded into a
3287 register in a single instruction. */
3288 bool
3289 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3291 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3292 return 1;
3293 return aarch64_bitmask_imm (val, mode);
3296 static bool
3297 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3299 rtx base, offset;
3301 if (GET_CODE (x) == HIGH)
3302 return true;
3304 split_const (x, &base, &offset);
3305 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3307 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3308 != SYMBOL_FORCE_TO_MEM)
3309 return true;
3310 else
3311 /* Avoid generating a 64-bit relocation in ILP32; leave
3312 to aarch64_expand_mov_immediate to handle it properly. */
3313 return mode != ptr_mode;
3316 return aarch64_tls_referenced_p (x);
3319 /* Return true if register REGNO is a valid index register.
3320 STRICT_P is true if REG_OK_STRICT is in effect. */
3322 bool
3323 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3325 if (!HARD_REGISTER_NUM_P (regno))
3327 if (!strict_p)
3328 return true;
3330 if (!reg_renumber)
3331 return false;
3333 regno = reg_renumber[regno];
3335 return GP_REGNUM_P (regno);
3338 /* Return true if register REGNO is a valid base register for mode MODE.
3339 STRICT_P is true if REG_OK_STRICT is in effect. */
3341 bool
3342 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3344 if (!HARD_REGISTER_NUM_P (regno))
3346 if (!strict_p)
3347 return true;
3349 if (!reg_renumber)
3350 return false;
3352 regno = reg_renumber[regno];
3355 /* The fake registers will be eliminated to either the stack or
3356 hard frame pointer, both of which are usually valid base registers.
3357 Reload deals with the cases where the eliminated form isn't valid. */
3358 return (GP_REGNUM_P (regno)
3359 || regno == SP_REGNUM
3360 || regno == FRAME_POINTER_REGNUM
3361 || regno == ARG_POINTER_REGNUM);
3364 /* Return true if X is a valid base register for mode MODE.
3365 STRICT_P is true if REG_OK_STRICT is in effect. */
3367 static bool
3368 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3370 if (!strict_p && GET_CODE (x) == SUBREG)
3371 x = SUBREG_REG (x);
3373 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3376 /* Return true if address offset is a valid index. If it is, fill in INFO
3377 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3379 static bool
3380 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3381 machine_mode mode, bool strict_p)
3383 enum aarch64_address_type type;
3384 rtx index;
3385 int shift;
3387 /* (reg:P) */
3388 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3389 && GET_MODE (x) == Pmode)
3391 type = ADDRESS_REG_REG;
3392 index = x;
3393 shift = 0;
3395 /* (sign_extend:DI (reg:SI)) */
3396 else if ((GET_CODE (x) == SIGN_EXTEND
3397 || GET_CODE (x) == ZERO_EXTEND)
3398 && GET_MODE (x) == DImode
3399 && GET_MODE (XEXP (x, 0)) == SImode)
3401 type = (GET_CODE (x) == SIGN_EXTEND)
3402 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3403 index = XEXP (x, 0);
3404 shift = 0;
3406 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3407 else if (GET_CODE (x) == MULT
3408 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3409 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3410 && GET_MODE (XEXP (x, 0)) == DImode
3411 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3412 && CONST_INT_P (XEXP (x, 1)))
3414 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3415 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3416 index = XEXP (XEXP (x, 0), 0);
3417 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3419 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3420 else if (GET_CODE (x) == ASHIFT
3421 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3422 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3423 && GET_MODE (XEXP (x, 0)) == DImode
3424 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3425 && CONST_INT_P (XEXP (x, 1)))
3427 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3428 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3429 index = XEXP (XEXP (x, 0), 0);
3430 shift = INTVAL (XEXP (x, 1));
3432 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3433 else if ((GET_CODE (x) == SIGN_EXTRACT
3434 || GET_CODE (x) == ZERO_EXTRACT)
3435 && GET_MODE (x) == DImode
3436 && GET_CODE (XEXP (x, 0)) == MULT
3437 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3438 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3440 type = (GET_CODE (x) == SIGN_EXTRACT)
3441 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3442 index = XEXP (XEXP (x, 0), 0);
3443 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3444 if (INTVAL (XEXP (x, 1)) != 32 + shift
3445 || INTVAL (XEXP (x, 2)) != 0)
3446 shift = -1;
3448 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3449 (const_int 0xffffffff<<shift)) */
3450 else if (GET_CODE (x) == AND
3451 && GET_MODE (x) == DImode
3452 && GET_CODE (XEXP (x, 0)) == MULT
3453 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3454 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3455 && CONST_INT_P (XEXP (x, 1)))
3457 type = ADDRESS_REG_UXTW;
3458 index = XEXP (XEXP (x, 0), 0);
3459 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3460 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3461 shift = -1;
3463 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3464 else if ((GET_CODE (x) == SIGN_EXTRACT
3465 || GET_CODE (x) == ZERO_EXTRACT)
3466 && GET_MODE (x) == DImode
3467 && GET_CODE (XEXP (x, 0)) == ASHIFT
3468 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3469 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3471 type = (GET_CODE (x) == SIGN_EXTRACT)
3472 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3473 index = XEXP (XEXP (x, 0), 0);
3474 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3475 if (INTVAL (XEXP (x, 1)) != 32 + shift
3476 || INTVAL (XEXP (x, 2)) != 0)
3477 shift = -1;
3479 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3480 (const_int 0xffffffff<<shift)) */
3481 else if (GET_CODE (x) == AND
3482 && GET_MODE (x) == DImode
3483 && GET_CODE (XEXP (x, 0)) == ASHIFT
3484 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3485 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3486 && CONST_INT_P (XEXP (x, 1)))
3488 type = ADDRESS_REG_UXTW;
3489 index = XEXP (XEXP (x, 0), 0);
3490 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3491 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3492 shift = -1;
3494 /* (mult:P (reg:P) (const_int scale)) */
3495 else if (GET_CODE (x) == MULT
3496 && GET_MODE (x) == Pmode
3497 && GET_MODE (XEXP (x, 0)) == Pmode
3498 && CONST_INT_P (XEXP (x, 1)))
3500 type = ADDRESS_REG_REG;
3501 index = XEXP (x, 0);
3502 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3504 /* (ashift:P (reg:P) (const_int shift)) */
3505 else if (GET_CODE (x) == ASHIFT
3506 && GET_MODE (x) == Pmode
3507 && GET_MODE (XEXP (x, 0)) == Pmode
3508 && CONST_INT_P (XEXP (x, 1)))
3510 type = ADDRESS_REG_REG;
3511 index = XEXP (x, 0);
3512 shift = INTVAL (XEXP (x, 1));
3514 else
3515 return false;
3517 if (GET_CODE (index) == SUBREG)
3518 index = SUBREG_REG (index);
3520 if ((shift == 0 ||
3521 (shift > 0 && shift <= 3
3522 && (1 << shift) == GET_MODE_SIZE (mode)))
3523 && REG_P (index)
3524 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3526 info->type = type;
3527 info->offset = index;
3528 info->shift = shift;
3529 return true;
3532 return false;
3535 bool
3536 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3538 return (offset >= -64 * GET_MODE_SIZE (mode)
3539 && offset < 64 * GET_MODE_SIZE (mode)
3540 && offset % GET_MODE_SIZE (mode) == 0);
3543 static inline bool
3544 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3545 HOST_WIDE_INT offset)
3547 return offset >= -256 && offset < 256;
3550 static inline bool
3551 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3553 return (offset >= 0
3554 && offset < 4096 * GET_MODE_SIZE (mode)
3555 && offset % GET_MODE_SIZE (mode) == 0);
3558 /* Return true if X is a valid address for machine mode MODE. If it is,
3559 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3560 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3562 static bool
3563 aarch64_classify_address (struct aarch64_address_info *info,
3564 rtx x, machine_mode mode,
3565 RTX_CODE outer_code, bool strict_p)
3567 enum rtx_code code = GET_CODE (x);
3568 rtx op0, op1;
3570 /* On BE, we use load/store pair for all large int mode load/stores. */
3571 bool load_store_pair_p = (outer_code == PARALLEL
3572 || (BYTES_BIG_ENDIAN
3573 && aarch64_vect_struct_mode_p (mode)));
3575 bool allow_reg_index_p =
3576 !load_store_pair_p
3577 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
3578 && !aarch64_vect_struct_mode_p (mode);
3580 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3581 REG addressing. */
3582 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
3583 && (code != POST_INC && code != REG))
3584 return false;
3586 switch (code)
3588 case REG:
3589 case SUBREG:
3590 info->type = ADDRESS_REG_IMM;
3591 info->base = x;
3592 info->offset = const0_rtx;
3593 return aarch64_base_register_rtx_p (x, strict_p);
3595 case PLUS:
3596 op0 = XEXP (x, 0);
3597 op1 = XEXP (x, 1);
3599 if (! strict_p
3600 && REG_P (op0)
3601 && (op0 == virtual_stack_vars_rtx
3602 || op0 == frame_pointer_rtx
3603 || op0 == arg_pointer_rtx)
3604 && CONST_INT_P (op1))
3606 info->type = ADDRESS_REG_IMM;
3607 info->base = op0;
3608 info->offset = op1;
3610 return true;
3613 if (GET_MODE_SIZE (mode) != 0
3614 && CONST_INT_P (op1)
3615 && aarch64_base_register_rtx_p (op0, strict_p))
3617 HOST_WIDE_INT offset = INTVAL (op1);
3619 info->type = ADDRESS_REG_IMM;
3620 info->base = op0;
3621 info->offset = op1;
3623 /* TImode and TFmode values are allowed in both pairs of X
3624 registers and individual Q registers. The available
3625 address modes are:
3626 X,X: 7-bit signed scaled offset
3627 Q: 9-bit signed offset
3628 We conservatively require an offset representable in either mode.
3630 if (mode == TImode || mode == TFmode)
3631 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3632 && offset_9bit_signed_unscaled_p (mode, offset));
3634 /* A 7bit offset check because OImode will emit a ldp/stp
3635 instruction (only big endian will get here).
3636 For ldp/stp instructions, the offset is scaled for the size of a
3637 single element of the pair. */
3638 if (mode == OImode)
3639 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
3641 /* Three 9/12 bit offsets checks because CImode will emit three
3642 ldr/str instructions (only big endian will get here). */
3643 if (mode == CImode)
3644 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3645 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
3646 || offset_12bit_unsigned_scaled_p (V16QImode,
3647 offset + 32)));
3649 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3650 instructions (only big endian will get here). */
3651 if (mode == XImode)
3652 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
3653 && aarch64_offset_7bit_signed_scaled_p (TImode,
3654 offset + 32));
3656 if (load_store_pair_p)
3657 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3658 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3659 else
3660 return (offset_9bit_signed_unscaled_p (mode, offset)
3661 || offset_12bit_unsigned_scaled_p (mode, offset));
3664 if (allow_reg_index_p)
3666 /* Look for base + (scaled/extended) index register. */
3667 if (aarch64_base_register_rtx_p (op0, strict_p)
3668 && aarch64_classify_index (info, op1, mode, strict_p))
3670 info->base = op0;
3671 return true;
3673 if (aarch64_base_register_rtx_p (op1, strict_p)
3674 && aarch64_classify_index (info, op0, mode, strict_p))
3676 info->base = op1;
3677 return true;
3681 return false;
3683 case POST_INC:
3684 case POST_DEC:
3685 case PRE_INC:
3686 case PRE_DEC:
3687 info->type = ADDRESS_REG_WB;
3688 info->base = XEXP (x, 0);
3689 info->offset = NULL_RTX;
3690 return aarch64_base_register_rtx_p (info->base, strict_p);
3692 case POST_MODIFY:
3693 case PRE_MODIFY:
3694 info->type = ADDRESS_REG_WB;
3695 info->base = XEXP (x, 0);
3696 if (GET_CODE (XEXP (x, 1)) == PLUS
3697 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3698 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3699 && aarch64_base_register_rtx_p (info->base, strict_p))
3701 HOST_WIDE_INT offset;
3702 info->offset = XEXP (XEXP (x, 1), 1);
3703 offset = INTVAL (info->offset);
3705 /* TImode and TFmode values are allowed in both pairs of X
3706 registers and individual Q registers. The available
3707 address modes are:
3708 X,X: 7-bit signed scaled offset
3709 Q: 9-bit signed offset
3710 We conservatively require an offset representable in either mode.
3712 if (mode == TImode || mode == TFmode)
3713 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3714 && offset_9bit_signed_unscaled_p (mode, offset));
3716 if (load_store_pair_p)
3717 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3718 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3719 else
3720 return offset_9bit_signed_unscaled_p (mode, offset);
3722 return false;
3724 case CONST:
3725 case SYMBOL_REF:
3726 case LABEL_REF:
3727 /* load literal: pc-relative constant pool entry. Only supported
3728 for SI mode or larger. */
3729 info->type = ADDRESS_SYMBOLIC;
3731 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
3733 rtx sym, addend;
3735 split_const (x, &sym, &addend);
3736 return (GET_CODE (sym) == LABEL_REF
3737 || (GET_CODE (sym) == SYMBOL_REF
3738 && CONSTANT_POOL_ADDRESS_P (sym)));
3740 return false;
3742 case LO_SUM:
3743 info->type = ADDRESS_LO_SUM;
3744 info->base = XEXP (x, 0);
3745 info->offset = XEXP (x, 1);
3746 if (allow_reg_index_p
3747 && aarch64_base_register_rtx_p (info->base, strict_p))
3749 rtx sym, offs;
3750 split_const (info->offset, &sym, &offs);
3751 if (GET_CODE (sym) == SYMBOL_REF
3752 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3753 == SYMBOL_SMALL_ABSOLUTE))
3755 /* The symbol and offset must be aligned to the access size. */
3756 unsigned int align;
3757 unsigned int ref_size;
3759 if (CONSTANT_POOL_ADDRESS_P (sym))
3760 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3761 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3763 tree exp = SYMBOL_REF_DECL (sym);
3764 align = TYPE_ALIGN (TREE_TYPE (exp));
3765 align = CONSTANT_ALIGNMENT (exp, align);
3767 else if (SYMBOL_REF_DECL (sym))
3768 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3769 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3770 && SYMBOL_REF_BLOCK (sym) != NULL)
3771 align = SYMBOL_REF_BLOCK (sym)->alignment;
3772 else
3773 align = BITS_PER_UNIT;
3775 ref_size = GET_MODE_SIZE (mode);
3776 if (ref_size == 0)
3777 ref_size = GET_MODE_SIZE (DImode);
3779 return ((INTVAL (offs) & (ref_size - 1)) == 0
3780 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3783 return false;
3785 default:
3786 return false;
3790 bool
3791 aarch64_symbolic_address_p (rtx x)
3793 rtx offset;
3795 split_const (x, &x, &offset);
3796 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3799 /* Classify the base of symbolic expression X, given that X appears in
3800 context CONTEXT. */
3802 enum aarch64_symbol_type
3803 aarch64_classify_symbolic_expression (rtx x,
3804 enum aarch64_symbol_context context)
3806 rtx offset;
3808 split_const (x, &x, &offset);
3809 return aarch64_classify_symbol (x, offset, context);
3813 /* Return TRUE if X is a legitimate address for accessing memory in
3814 mode MODE. */
3815 static bool
3816 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3818 struct aarch64_address_info addr;
3820 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3823 /* Return TRUE if X is a legitimate address for accessing memory in
3824 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3825 pair operation. */
3826 bool
3827 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3828 RTX_CODE outer_code, bool strict_p)
3830 struct aarch64_address_info addr;
3832 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3835 /* Return TRUE if rtx X is immediate constant 0.0 */
3836 bool
3837 aarch64_float_const_zero_rtx_p (rtx x)
3839 REAL_VALUE_TYPE r;
3841 if (GET_MODE (x) == VOIDmode)
3842 return false;
3844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845 if (REAL_VALUE_MINUS_ZERO (r))
3846 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3847 return REAL_VALUES_EQUAL (r, dconst0);
3850 /* Return the fixed registers used for condition codes. */
3852 static bool
3853 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3855 *p1 = CC_REGNUM;
3856 *p2 = INVALID_REGNUM;
3857 return true;
3860 /* Emit call insn with PAT and do aarch64-specific handling. */
3862 void
3863 aarch64_emit_call_insn (rtx pat)
3865 rtx insn = emit_call_insn (pat);
3867 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3868 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3869 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3872 machine_mode
3873 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3875 /* All floating point compares return CCFP if it is an equality
3876 comparison, and CCFPE otherwise. */
3877 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3879 switch (code)
3881 case EQ:
3882 case NE:
3883 case UNORDERED:
3884 case ORDERED:
3885 case UNLT:
3886 case UNLE:
3887 case UNGT:
3888 case UNGE:
3889 case UNEQ:
3890 case LTGT:
3891 return CCFPmode;
3893 case LT:
3894 case LE:
3895 case GT:
3896 case GE:
3897 return CCFPEmode;
3899 default:
3900 gcc_unreachable ();
3904 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3905 && y == const0_rtx
3906 && (code == EQ || code == NE || code == LT || code == GE)
3907 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3908 || GET_CODE (x) == NEG))
3909 return CC_NZmode;
3911 /* A compare with a shifted operand. Because of canonicalization,
3912 the comparison will have to be swapped when we emit the assembly
3913 code. */
3914 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3915 && (REG_P (y) || GET_CODE (y) == SUBREG)
3916 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3917 || GET_CODE (x) == LSHIFTRT
3918 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3919 return CC_SWPmode;
3921 /* Similarly for a negated operand, but we can only do this for
3922 equalities. */
3923 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3924 && (REG_P (y) || GET_CODE (y) == SUBREG)
3925 && (code == EQ || code == NE)
3926 && GET_CODE (x) == NEG)
3927 return CC_Zmode;
3929 /* A compare of a mode narrower than SI mode against zero can be done
3930 by extending the value in the comparison. */
3931 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3932 && y == const0_rtx)
3933 /* Only use sign-extension if we really need it. */
3934 return ((code == GT || code == GE || code == LE || code == LT)
3935 ? CC_SESWPmode : CC_ZESWPmode);
3937 /* For everything else, return CCmode. */
3938 return CCmode;
3941 static int
3942 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3945 aarch64_get_condition_code (rtx x)
3947 machine_mode mode = GET_MODE (XEXP (x, 0));
3948 enum rtx_code comp_code = GET_CODE (x);
3950 if (GET_MODE_CLASS (mode) != MODE_CC)
3951 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3952 return aarch64_get_condition_code_1 (mode, comp_code);
3955 static int
3956 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3958 int ne = -1, eq = -1;
3959 switch (mode)
3961 case CCFPmode:
3962 case CCFPEmode:
3963 switch (comp_code)
3965 case GE: return AARCH64_GE;
3966 case GT: return AARCH64_GT;
3967 case LE: return AARCH64_LS;
3968 case LT: return AARCH64_MI;
3969 case NE: return AARCH64_NE;
3970 case EQ: return AARCH64_EQ;
3971 case ORDERED: return AARCH64_VC;
3972 case UNORDERED: return AARCH64_VS;
3973 case UNLT: return AARCH64_LT;
3974 case UNLE: return AARCH64_LE;
3975 case UNGT: return AARCH64_HI;
3976 case UNGE: return AARCH64_PL;
3977 default: return -1;
3979 break;
3981 case CC_DNEmode:
3982 ne = AARCH64_NE;
3983 eq = AARCH64_EQ;
3984 break;
3986 case CC_DEQmode:
3987 ne = AARCH64_EQ;
3988 eq = AARCH64_NE;
3989 break;
3991 case CC_DGEmode:
3992 ne = AARCH64_GE;
3993 eq = AARCH64_LT;
3994 break;
3996 case CC_DLTmode:
3997 ne = AARCH64_LT;
3998 eq = AARCH64_GE;
3999 break;
4001 case CC_DGTmode:
4002 ne = AARCH64_GT;
4003 eq = AARCH64_LE;
4004 break;
4006 case CC_DLEmode:
4007 ne = AARCH64_LE;
4008 eq = AARCH64_GT;
4009 break;
4011 case CC_DGEUmode:
4012 ne = AARCH64_CS;
4013 eq = AARCH64_CC;
4014 break;
4016 case CC_DLTUmode:
4017 ne = AARCH64_CC;
4018 eq = AARCH64_CS;
4019 break;
4021 case CC_DGTUmode:
4022 ne = AARCH64_HI;
4023 eq = AARCH64_LS;
4024 break;
4026 case CC_DLEUmode:
4027 ne = AARCH64_LS;
4028 eq = AARCH64_HI;
4029 break;
4031 case CCmode:
4032 switch (comp_code)
4034 case NE: return AARCH64_NE;
4035 case EQ: return AARCH64_EQ;
4036 case GE: return AARCH64_GE;
4037 case GT: return AARCH64_GT;
4038 case LE: return AARCH64_LE;
4039 case LT: return AARCH64_LT;
4040 case GEU: return AARCH64_CS;
4041 case GTU: return AARCH64_HI;
4042 case LEU: return AARCH64_LS;
4043 case LTU: return AARCH64_CC;
4044 default: return -1;
4046 break;
4048 case CC_SWPmode:
4049 case CC_ZESWPmode:
4050 case CC_SESWPmode:
4051 switch (comp_code)
4053 case NE: return AARCH64_NE;
4054 case EQ: return AARCH64_EQ;
4055 case GE: return AARCH64_LE;
4056 case GT: return AARCH64_LT;
4057 case LE: return AARCH64_GE;
4058 case LT: return AARCH64_GT;
4059 case GEU: return AARCH64_LS;
4060 case GTU: return AARCH64_CC;
4061 case LEU: return AARCH64_CS;
4062 case LTU: return AARCH64_HI;
4063 default: return -1;
4065 break;
4067 case CC_NZmode:
4068 switch (comp_code)
4070 case NE: return AARCH64_NE;
4071 case EQ: return AARCH64_EQ;
4072 case GE: return AARCH64_PL;
4073 case LT: return AARCH64_MI;
4074 default: return -1;
4076 break;
4078 case CC_Zmode:
4079 switch (comp_code)
4081 case NE: return AARCH64_NE;
4082 case EQ: return AARCH64_EQ;
4083 default: return -1;
4085 break;
4087 default:
4088 return -1;
4089 break;
4092 if (comp_code == NE)
4093 return ne;
4095 if (comp_code == EQ)
4096 return eq;
4098 return -1;
4101 bool
4102 aarch64_const_vec_all_same_in_range_p (rtx x,
4103 HOST_WIDE_INT minval,
4104 HOST_WIDE_INT maxval)
4106 HOST_WIDE_INT firstval;
4107 int count, i;
4109 if (GET_CODE (x) != CONST_VECTOR
4110 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
4111 return false;
4113 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
4114 if (firstval < minval || firstval > maxval)
4115 return false;
4117 count = CONST_VECTOR_NUNITS (x);
4118 for (i = 1; i < count; i++)
4119 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
4120 return false;
4122 return true;
4125 bool
4126 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
4128 return aarch64_const_vec_all_same_in_range_p (x, val, val);
4131 static unsigned
4132 bit_count (unsigned HOST_WIDE_INT value)
4134 unsigned count = 0;
4136 while (value)
4138 count++;
4139 value &= value - 1;
4142 return count;
4145 /* N Z C V. */
4146 #define AARCH64_CC_V 1
4147 #define AARCH64_CC_C (1 << 1)
4148 #define AARCH64_CC_Z (1 << 2)
4149 #define AARCH64_CC_N (1 << 3)
4151 /* N Z C V flags for ccmp. The first code is for AND op and the other
4152 is for IOR op. Indexed by AARCH64_COND_CODE. */
4153 static const int aarch64_nzcv_codes[][2] =
4155 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
4156 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
4157 {AARCH64_CC_C, 0}, /* CS, C == 1. */
4158 {0, AARCH64_CC_C}, /* CC, C == 0. */
4159 {AARCH64_CC_N, 0}, /* MI, N == 1. */
4160 {0, AARCH64_CC_N}, /* PL, N == 0. */
4161 {AARCH64_CC_V, 0}, /* VS, V == 1. */
4162 {0, AARCH64_CC_V}, /* VC, V == 0. */
4163 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
4164 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
4165 {0, AARCH64_CC_V}, /* GE, N == V. */
4166 {AARCH64_CC_V, 0}, /* LT, N != V. */
4167 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
4168 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
4169 {0, 0}, /* AL, Any. */
4170 {0, 0}, /* NV, Any. */
4174 aarch64_ccmp_mode_to_code (enum machine_mode mode)
4176 switch (mode)
4178 case CC_DNEmode:
4179 return NE;
4181 case CC_DEQmode:
4182 return EQ;
4184 case CC_DLEmode:
4185 return LE;
4187 case CC_DGTmode:
4188 return GT;
4190 case CC_DLTmode:
4191 return LT;
4193 case CC_DGEmode:
4194 return GE;
4196 case CC_DLEUmode:
4197 return LEU;
4199 case CC_DGTUmode:
4200 return GTU;
4202 case CC_DLTUmode:
4203 return LTU;
4205 case CC_DGEUmode:
4206 return GEU;
4208 default:
4209 gcc_unreachable ();
4214 void
4215 aarch64_print_operand (FILE *f, rtx x, char code)
4217 switch (code)
4219 /* An integer or symbol address without a preceding # sign. */
4220 case 'c':
4221 switch (GET_CODE (x))
4223 case CONST_INT:
4224 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
4225 break;
4227 case SYMBOL_REF:
4228 output_addr_const (f, x);
4229 break;
4231 case CONST:
4232 if (GET_CODE (XEXP (x, 0)) == PLUS
4233 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4235 output_addr_const (f, x);
4236 break;
4238 /* Fall through. */
4240 default:
4241 output_operand_lossage ("Unsupported operand for code '%c'", code);
4243 break;
4245 case 'e':
4246 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4248 int n;
4250 if (!CONST_INT_P (x)
4251 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4253 output_operand_lossage ("invalid operand for '%%%c'", code);
4254 return;
4257 switch (n)
4259 case 3:
4260 fputc ('b', f);
4261 break;
4262 case 4:
4263 fputc ('h', f);
4264 break;
4265 case 5:
4266 fputc ('w', f);
4267 break;
4268 default:
4269 output_operand_lossage ("invalid operand for '%%%c'", code);
4270 return;
4273 break;
4275 case 'p':
4277 int n;
4279 /* Print N such that 2^N == X. */
4280 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4282 output_operand_lossage ("invalid operand for '%%%c'", code);
4283 return;
4286 asm_fprintf (f, "%d", n);
4288 break;
4290 case 'P':
4291 /* Print the number of non-zero bits in X (a const_int). */
4292 if (!CONST_INT_P (x))
4294 output_operand_lossage ("invalid operand for '%%%c'", code);
4295 return;
4298 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4299 break;
4301 case 'H':
4302 /* Print the higher numbered register of a pair (TImode) of regs. */
4303 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4305 output_operand_lossage ("invalid operand for '%%%c'", code);
4306 return;
4309 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4310 break;
4312 case 'm':
4314 int cond_code;
4315 /* Print a condition (eq, ne, etc). */
4317 /* CONST_TRUE_RTX means always -- that's the default. */
4318 if (x == const_true_rtx)
4319 return;
4321 if (!COMPARISON_P (x))
4323 output_operand_lossage ("invalid operand for '%%%c'", code);
4324 return;
4327 cond_code = aarch64_get_condition_code (x);
4328 gcc_assert (cond_code >= 0);
4329 fputs (aarch64_condition_codes[cond_code], f);
4331 break;
4333 case 'M':
4335 int cond_code;
4336 /* Print the inverse of a condition (eq <-> ne, etc). */
4338 /* CONST_TRUE_RTX means never -- that's the default. */
4339 if (x == const_true_rtx)
4341 fputs ("nv", f);
4342 return;
4345 if (!COMPARISON_P (x))
4347 output_operand_lossage ("invalid operand for '%%%c'", code);
4348 return;
4350 cond_code = aarch64_get_condition_code (x);
4351 gcc_assert (cond_code >= 0);
4352 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4353 (cond_code)], f);
4355 break;
4357 case 'b':
4358 case 'h':
4359 case 's':
4360 case 'd':
4361 case 'q':
4362 /* Print a scalar FP/SIMD register name. */
4363 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4365 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4366 return;
4368 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4369 break;
4371 case 'S':
4372 case 'T':
4373 case 'U':
4374 case 'V':
4375 /* Print the first FP/SIMD register name in a list. */
4376 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4378 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4379 return;
4381 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4382 break;
4384 case 'R':
4385 /* Print a scalar FP/SIMD register name + 1. */
4386 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4388 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4389 return;
4391 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
4392 break;
4394 case 'X':
4395 /* Print bottom 16 bits of integer constant in hex. */
4396 if (!CONST_INT_P (x))
4398 output_operand_lossage ("invalid operand for '%%%c'", code);
4399 return;
4401 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4402 break;
4404 case 'w':
4405 case 'x':
4406 /* Print a general register name or the zero register (32-bit or
4407 64-bit). */
4408 if (x == const0_rtx
4409 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4411 asm_fprintf (f, "%czr", code);
4412 break;
4415 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4417 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4418 break;
4421 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4423 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4424 break;
4427 /* Fall through */
4429 case 0:
4430 /* Print a normal operand, if it's a general register, then we
4431 assume DImode. */
4432 if (x == NULL)
4434 output_operand_lossage ("missing operand");
4435 return;
4438 switch (GET_CODE (x))
4440 case REG:
4441 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4442 break;
4444 case MEM:
4445 aarch64_memory_reference_mode = GET_MODE (x);
4446 output_address (XEXP (x, 0));
4447 break;
4449 case LABEL_REF:
4450 case SYMBOL_REF:
4451 output_addr_const (asm_out_file, x);
4452 break;
4454 case CONST_INT:
4455 asm_fprintf (f, "%wd", INTVAL (x));
4456 break;
4458 case CONST_VECTOR:
4459 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4461 gcc_assert (
4462 aarch64_const_vec_all_same_in_range_p (x,
4463 HOST_WIDE_INT_MIN,
4464 HOST_WIDE_INT_MAX));
4465 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4467 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4469 fputc ('0', f);
4471 else
4472 gcc_unreachable ();
4473 break;
4475 case CONST_DOUBLE:
4476 /* CONST_DOUBLE can represent a double-width integer.
4477 In this case, the mode of x is VOIDmode. */
4478 if (GET_MODE (x) == VOIDmode)
4479 ; /* Do Nothing. */
4480 else if (aarch64_float_const_zero_rtx_p (x))
4482 fputc ('0', f);
4483 break;
4485 else if (aarch64_float_const_representable_p (x))
4487 #define buf_size 20
4488 char float_buf[buf_size] = {'\0'};
4489 REAL_VALUE_TYPE r;
4490 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4491 real_to_decimal_for_mode (float_buf, &r,
4492 buf_size, buf_size,
4493 1, GET_MODE (x));
4494 asm_fprintf (asm_out_file, "%s", float_buf);
4495 break;
4496 #undef buf_size
4498 output_operand_lossage ("invalid constant");
4499 return;
4500 default:
4501 output_operand_lossage ("invalid operand");
4502 return;
4504 break;
4506 case 'A':
4507 if (GET_CODE (x) == HIGH)
4508 x = XEXP (x, 0);
4510 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4512 case SYMBOL_SMALL_GOT_4G:
4513 asm_fprintf (asm_out_file, ":got:");
4514 break;
4516 case SYMBOL_SMALL_TLSGD:
4517 asm_fprintf (asm_out_file, ":tlsgd:");
4518 break;
4520 case SYMBOL_SMALL_TLSDESC:
4521 asm_fprintf (asm_out_file, ":tlsdesc:");
4522 break;
4524 case SYMBOL_SMALL_GOTTPREL:
4525 asm_fprintf (asm_out_file, ":gottprel:");
4526 break;
4528 case SYMBOL_TLSLE:
4529 asm_fprintf (asm_out_file, ":tprel:");
4530 break;
4532 case SYMBOL_TINY_GOT:
4533 gcc_unreachable ();
4534 break;
4536 default:
4537 break;
4539 output_addr_const (asm_out_file, x);
4540 break;
4542 case 'L':
4543 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4545 case SYMBOL_SMALL_GOT_4G:
4546 asm_fprintf (asm_out_file, ":lo12:");
4547 break;
4549 case SYMBOL_SMALL_TLSGD:
4550 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4551 break;
4553 case SYMBOL_SMALL_TLSDESC:
4554 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4555 break;
4557 case SYMBOL_SMALL_GOTTPREL:
4558 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4559 break;
4561 case SYMBOL_TLSLE:
4562 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4563 break;
4565 case SYMBOL_TINY_GOT:
4566 asm_fprintf (asm_out_file, ":got:");
4567 break;
4569 default:
4570 break;
4572 output_addr_const (asm_out_file, x);
4573 break;
4575 case 'G':
4577 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4579 case SYMBOL_TLSLE:
4580 asm_fprintf (asm_out_file, ":tprel_hi12:");
4581 break;
4582 default:
4583 break;
4585 output_addr_const (asm_out_file, x);
4586 break;
4588 case 'K':
4590 int cond_code;
4591 /* Print nzcv. */
4593 if (!COMPARISON_P (x))
4595 output_operand_lossage ("invalid operand for '%%%c'", code);
4596 return;
4599 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4600 gcc_assert (cond_code >= 0);
4601 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4603 break;
4605 case 'k':
4607 int cond_code;
4608 /* Print nzcv. */
4610 if (!COMPARISON_P (x))
4612 output_operand_lossage ("invalid operand for '%%%c'", code);
4613 return;
4616 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4617 gcc_assert (cond_code >= 0);
4618 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4620 break;
4622 default:
4623 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4624 return;
4628 void
4629 aarch64_print_operand_address (FILE *f, rtx x)
4631 struct aarch64_address_info addr;
4633 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4634 MEM, true))
4635 switch (addr.type)
4637 case ADDRESS_REG_IMM:
4638 if (addr.offset == const0_rtx)
4639 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4640 else
4641 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4642 INTVAL (addr.offset));
4643 return;
4645 case ADDRESS_REG_REG:
4646 if (addr.shift == 0)
4647 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4648 reg_names [REGNO (addr.offset)]);
4649 else
4650 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4651 reg_names [REGNO (addr.offset)], addr.shift);
4652 return;
4654 case ADDRESS_REG_UXTW:
4655 if (addr.shift == 0)
4656 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4657 REGNO (addr.offset) - R0_REGNUM);
4658 else
4659 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4660 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4661 return;
4663 case ADDRESS_REG_SXTW:
4664 if (addr.shift == 0)
4665 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4666 REGNO (addr.offset) - R0_REGNUM);
4667 else
4668 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4669 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4670 return;
4672 case ADDRESS_REG_WB:
4673 switch (GET_CODE (x))
4675 case PRE_INC:
4676 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4677 GET_MODE_SIZE (aarch64_memory_reference_mode));
4678 return;
4679 case POST_INC:
4680 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4681 GET_MODE_SIZE (aarch64_memory_reference_mode));
4682 return;
4683 case PRE_DEC:
4684 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4685 GET_MODE_SIZE (aarch64_memory_reference_mode));
4686 return;
4687 case POST_DEC:
4688 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4689 GET_MODE_SIZE (aarch64_memory_reference_mode));
4690 return;
4691 case PRE_MODIFY:
4692 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4693 INTVAL (addr.offset));
4694 return;
4695 case POST_MODIFY:
4696 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4697 INTVAL (addr.offset));
4698 return;
4699 default:
4700 break;
4702 break;
4704 case ADDRESS_LO_SUM:
4705 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4706 output_addr_const (f, addr.offset);
4707 asm_fprintf (f, "]");
4708 return;
4710 case ADDRESS_SYMBOLIC:
4711 break;
4714 output_addr_const (f, x);
4717 bool
4718 aarch64_label_mentioned_p (rtx x)
4720 const char *fmt;
4721 int i;
4723 if (GET_CODE (x) == LABEL_REF)
4724 return true;
4726 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4727 referencing instruction, but they are constant offsets, not
4728 symbols. */
4729 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4730 return false;
4732 fmt = GET_RTX_FORMAT (GET_CODE (x));
4733 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4735 if (fmt[i] == 'E')
4737 int j;
4739 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4740 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4741 return 1;
4743 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4744 return 1;
4747 return 0;
4750 /* Implement REGNO_REG_CLASS. */
4752 enum reg_class
4753 aarch64_regno_regclass (unsigned regno)
4755 if (GP_REGNUM_P (regno))
4756 return GENERAL_REGS;
4758 if (regno == SP_REGNUM)
4759 return STACK_REG;
4761 if (regno == FRAME_POINTER_REGNUM
4762 || regno == ARG_POINTER_REGNUM)
4763 return POINTER_REGS;
4765 if (FP_REGNUM_P (regno))
4766 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4768 return NO_REGS;
4771 static rtx
4772 aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
4774 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4775 where mask is selected by alignment and size of the offset.
4776 We try to pick as large a range for the offset as possible to
4777 maximize the chance of a CSE. However, for aligned addresses
4778 we limit the range to 4k so that structures with different sized
4779 elements are likely to use the same base. */
4781 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4783 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4784 HOST_WIDE_INT base_offset;
4786 /* Does it look like we'll need a load/store-pair operation? */
4787 if (GET_MODE_SIZE (mode) > 16
4788 || mode == TImode)
4789 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4790 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4791 /* For offsets aren't a multiple of the access size, the limit is
4792 -256...255. */
4793 else if (offset & (GET_MODE_SIZE (mode) - 1))
4794 base_offset = (offset + 0x100) & ~0x1ff;
4795 else
4796 base_offset = offset & ~0xfff;
4798 if (base_offset == 0)
4799 return x;
4801 offset -= base_offset;
4802 rtx base_reg = gen_reg_rtx (Pmode);
4803 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4804 NULL_RTX);
4805 emit_move_insn (base_reg, val);
4806 x = plus_constant (Pmode, base_reg, offset);
4809 return x;
4812 /* Try a machine-dependent way of reloading an illegitimate address
4813 operand. If we find one, push the reload and return the new rtx. */
4816 aarch64_legitimize_reload_address (rtx *x_p,
4817 machine_mode mode,
4818 int opnum, int type,
4819 int ind_levels ATTRIBUTE_UNUSED)
4821 rtx x = *x_p;
4823 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4824 if (aarch64_vect_struct_mode_p (mode)
4825 && GET_CODE (x) == PLUS
4826 && REG_P (XEXP (x, 0))
4827 && CONST_INT_P (XEXP (x, 1)))
4829 rtx orig_rtx = x;
4830 x = copy_rtx (x);
4831 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4832 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4833 opnum, (enum reload_type) type);
4834 return x;
4837 /* We must recognize output that we have already generated ourselves. */
4838 if (GET_CODE (x) == PLUS
4839 && GET_CODE (XEXP (x, 0)) == PLUS
4840 && REG_P (XEXP (XEXP (x, 0), 0))
4841 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4842 && CONST_INT_P (XEXP (x, 1)))
4844 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4845 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4846 opnum, (enum reload_type) type);
4847 return x;
4850 /* We wish to handle large displacements off a base register by splitting
4851 the addend across an add and the mem insn. This can cut the number of
4852 extra insns needed from 3 to 1. It is only useful for load/store of a
4853 single register with 12 bit offset field. */
4854 if (GET_CODE (x) == PLUS
4855 && REG_P (XEXP (x, 0))
4856 && CONST_INT_P (XEXP (x, 1))
4857 && HARD_REGISTER_P (XEXP (x, 0))
4858 && mode != TImode
4859 && mode != TFmode
4860 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4862 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4863 HOST_WIDE_INT low = val & 0xfff;
4864 HOST_WIDE_INT high = val - low;
4865 HOST_WIDE_INT offs;
4866 rtx cst;
4867 machine_mode xmode = GET_MODE (x);
4869 /* In ILP32, xmode can be either DImode or SImode. */
4870 gcc_assert (xmode == DImode || xmode == SImode);
4872 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4873 BLKmode alignment. */
4874 if (GET_MODE_SIZE (mode) == 0)
4875 return NULL_RTX;
4877 offs = low % GET_MODE_SIZE (mode);
4879 /* Align misaligned offset by adjusting high part to compensate. */
4880 if (offs != 0)
4882 if (aarch64_uimm12_shift (high + offs))
4884 /* Align down. */
4885 low = low - offs;
4886 high = high + offs;
4888 else
4890 /* Align up. */
4891 offs = GET_MODE_SIZE (mode) - offs;
4892 low = low + offs;
4893 high = high + (low & 0x1000) - offs;
4894 low &= 0xfff;
4898 /* Check for overflow. */
4899 if (high + low != val)
4900 return NULL_RTX;
4902 cst = GEN_INT (high);
4903 if (!aarch64_uimm12_shift (high))
4904 cst = force_const_mem (xmode, cst);
4906 /* Reload high part into base reg, leaving the low part
4907 in the mem instruction.
4908 Note that replacing this gen_rtx_PLUS with plus_constant is
4909 wrong in this case because we rely on the
4910 (plus (plus reg c1) c2) structure being preserved so that
4911 XEXP (*p, 0) in push_reload below uses the correct term. */
4912 x = gen_rtx_PLUS (xmode,
4913 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4914 GEN_INT (low));
4916 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4917 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4918 opnum, (enum reload_type) type);
4919 return x;
4922 return NULL_RTX;
4926 static reg_class_t
4927 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4928 reg_class_t rclass,
4929 machine_mode mode,
4930 secondary_reload_info *sri)
4932 /* Without the TARGET_SIMD instructions we cannot move a Q register
4933 to a Q register directly. We need a scratch. */
4934 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4935 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4936 && reg_class_subset_p (rclass, FP_REGS))
4938 if (mode == TFmode)
4939 sri->icode = CODE_FOR_aarch64_reload_movtf;
4940 else if (mode == TImode)
4941 sri->icode = CODE_FOR_aarch64_reload_movti;
4942 return NO_REGS;
4945 /* A TFmode or TImode memory access should be handled via an FP_REGS
4946 because AArch64 has richer addressing modes for LDR/STR instructions
4947 than LDP/STP instructions. */
4948 if (TARGET_FLOAT && rclass == GENERAL_REGS
4949 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4950 return FP_REGS;
4952 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4953 return GENERAL_REGS;
4955 return NO_REGS;
4958 static bool
4959 aarch64_can_eliminate (const int from, const int to)
4961 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4962 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4964 if (frame_pointer_needed)
4966 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4967 return true;
4968 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4969 return false;
4970 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4971 && !cfun->calls_alloca)
4972 return true;
4973 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4974 return true;
4976 return false;
4978 else
4980 /* If we decided that we didn't need a leaf frame pointer but then used
4981 LR in the function, then we'll want a frame pointer after all, so
4982 prevent this elimination to ensure a frame pointer is used. */
4983 if (to == STACK_POINTER_REGNUM
4984 && flag_omit_leaf_frame_pointer
4985 && df_regs_ever_live_p (LR_REGNUM))
4986 return false;
4989 return true;
4992 HOST_WIDE_INT
4993 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4995 aarch64_layout_frame ();
4997 if (to == HARD_FRAME_POINTER_REGNUM)
4999 if (from == ARG_POINTER_REGNUM)
5000 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
5002 if (from == FRAME_POINTER_REGNUM)
5003 return (cfun->machine->frame.hard_fp_offset
5004 - cfun->machine->frame.saved_varargs_size);
5007 if (to == STACK_POINTER_REGNUM)
5009 if (from == FRAME_POINTER_REGNUM)
5010 return (cfun->machine->frame.frame_size
5011 - cfun->machine->frame.saved_varargs_size);
5014 return cfun->machine->frame.frame_size;
5017 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
5018 previous frame. */
5021 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
5023 if (count != 0)
5024 return const0_rtx;
5025 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
5029 static void
5030 aarch64_asm_trampoline_template (FILE *f)
5032 if (TARGET_ILP32)
5034 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
5035 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
5037 else
5039 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
5040 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
5042 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
5043 assemble_aligned_integer (4, const0_rtx);
5044 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5045 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
5048 static void
5049 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5051 rtx fnaddr, mem, a_tramp;
5052 const int tramp_code_sz = 16;
5054 /* Don't need to copy the trailing D-words, we fill those in below. */
5055 emit_block_move (m_tramp, assemble_trampoline_template (),
5056 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
5057 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
5058 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5059 if (GET_MODE (fnaddr) != ptr_mode)
5060 fnaddr = convert_memory_address (ptr_mode, fnaddr);
5061 emit_move_insn (mem, fnaddr);
5063 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
5064 emit_move_insn (mem, chain_value);
5066 /* XXX We should really define a "clear_cache" pattern and use
5067 gen_clear_cache(). */
5068 a_tramp = XEXP (m_tramp, 0);
5069 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
5070 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
5071 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
5072 ptr_mode);
5075 static unsigned char
5076 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
5078 switch (regclass)
5080 case CALLER_SAVE_REGS:
5081 case POINTER_REGS:
5082 case GENERAL_REGS:
5083 case ALL_REGS:
5084 case FP_REGS:
5085 case FP_LO_REGS:
5086 return
5087 aarch64_vector_mode_p (mode)
5088 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
5089 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5090 case STACK_REG:
5091 return 1;
5093 case NO_REGS:
5094 return 0;
5096 default:
5097 break;
5099 gcc_unreachable ();
5102 static reg_class_t
5103 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
5105 if (regclass == POINTER_REGS)
5106 return GENERAL_REGS;
5108 if (regclass == STACK_REG)
5110 if (REG_P(x)
5111 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
5112 return regclass;
5114 return NO_REGS;
5117 /* If it's an integer immediate that MOVI can't handle, then
5118 FP_REGS is not an option, so we return NO_REGS instead. */
5119 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
5120 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
5121 return NO_REGS;
5123 /* Register eliminiation can result in a request for
5124 SP+constant->FP_REGS. We cannot support such operations which
5125 use SP as source and an FP_REG as destination, so reject out
5126 right now. */
5127 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
5129 rtx lhs = XEXP (x, 0);
5131 /* Look through a possible SUBREG introduced by ILP32. */
5132 if (GET_CODE (lhs) == SUBREG)
5133 lhs = SUBREG_REG (lhs);
5135 gcc_assert (REG_P (lhs));
5136 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
5137 POINTER_REGS));
5138 return NO_REGS;
5141 return regclass;
5144 void
5145 aarch64_asm_output_labelref (FILE* f, const char *name)
5147 asm_fprintf (f, "%U%s", name);
5150 static void
5151 aarch64_elf_asm_constructor (rtx symbol, int priority)
5153 if (priority == DEFAULT_INIT_PRIORITY)
5154 default_ctor_section_asm_out_constructor (symbol, priority);
5155 else
5157 section *s;
5158 char buf[18];
5159 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
5160 s = get_section (buf, SECTION_WRITE, NULL);
5161 switch_to_section (s);
5162 assemble_align (POINTER_SIZE);
5163 assemble_aligned_integer (POINTER_BYTES, symbol);
5167 static void
5168 aarch64_elf_asm_destructor (rtx symbol, int priority)
5170 if (priority == DEFAULT_INIT_PRIORITY)
5171 default_dtor_section_asm_out_destructor (symbol, priority);
5172 else
5174 section *s;
5175 char buf[18];
5176 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
5177 s = get_section (buf, SECTION_WRITE, NULL);
5178 switch_to_section (s);
5179 assemble_align (POINTER_SIZE);
5180 assemble_aligned_integer (POINTER_BYTES, symbol);
5184 const char*
5185 aarch64_output_casesi (rtx *operands)
5187 char buf[100];
5188 char label[100];
5189 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
5190 int index;
5191 static const char *const patterns[4][2] =
5194 "ldrb\t%w3, [%0,%w1,uxtw]",
5195 "add\t%3, %4, %w3, sxtb #2"
5198 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5199 "add\t%3, %4, %w3, sxth #2"
5202 "ldr\t%w3, [%0,%w1,uxtw #2]",
5203 "add\t%3, %4, %w3, sxtw #2"
5205 /* We assume that DImode is only generated when not optimizing and
5206 that we don't really need 64-bit address offsets. That would
5207 imply an object file with 8GB of code in a single function! */
5209 "ldr\t%w3, [%0,%w1,uxtw #2]",
5210 "add\t%3, %4, %w3, sxtw #2"
5214 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
5216 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
5218 gcc_assert (index >= 0 && index <= 3);
5220 /* Need to implement table size reduction, by chaning the code below. */
5221 output_asm_insn (patterns[index][0], operands);
5222 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
5223 snprintf (buf, sizeof (buf),
5224 "adr\t%%4, %s", targetm.strip_name_encoding (label));
5225 output_asm_insn (buf, operands);
5226 output_asm_insn (patterns[index][1], operands);
5227 output_asm_insn ("br\t%3", operands);
5228 assemble_label (asm_out_file, label);
5229 return "";
5233 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5234 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5235 operator. */
5238 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
5240 if (shift >= 0 && shift <= 3)
5242 int size;
5243 for (size = 8; size <= 32; size *= 2)
5245 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
5246 if (mask == bits << shift)
5247 return size;
5250 return 0;
5253 static bool
5254 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
5255 const_rtx x ATTRIBUTE_UNUSED)
5257 /* We can't use blocks for constants when we're using a per-function
5258 constant pool. */
5259 return false;
5262 static section *
5263 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5264 rtx x ATTRIBUTE_UNUSED,
5265 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5267 /* Force all constant pool entries into the current function section. */
5268 return function_section (current_function_decl);
5272 /* Costs. */
5274 /* Helper function for rtx cost calculation. Strip a shift expression
5275 from X. Returns the inner operand if successful, or the original
5276 expression on failure. */
5277 static rtx
5278 aarch64_strip_shift (rtx x)
5280 rtx op = x;
5282 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5283 we can convert both to ROR during final output. */
5284 if ((GET_CODE (op) == ASHIFT
5285 || GET_CODE (op) == ASHIFTRT
5286 || GET_CODE (op) == LSHIFTRT
5287 || GET_CODE (op) == ROTATERT
5288 || GET_CODE (op) == ROTATE)
5289 && CONST_INT_P (XEXP (op, 1)))
5290 return XEXP (op, 0);
5292 if (GET_CODE (op) == MULT
5293 && CONST_INT_P (XEXP (op, 1))
5294 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5295 return XEXP (op, 0);
5297 return x;
5300 /* Helper function for rtx cost calculation. Strip an extend
5301 expression from X. Returns the inner operand if successful, or the
5302 original expression on failure. We deal with a number of possible
5303 canonicalization variations here. */
5304 static rtx
5305 aarch64_strip_extend (rtx x)
5307 rtx op = x;
5309 /* Zero and sign extraction of a widened value. */
5310 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5311 && XEXP (op, 2) == const0_rtx
5312 && GET_CODE (XEXP (op, 0)) == MULT
5313 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5314 XEXP (op, 1)))
5315 return XEXP (XEXP (op, 0), 0);
5317 /* It can also be represented (for zero-extend) as an AND with an
5318 immediate. */
5319 if (GET_CODE (op) == AND
5320 && GET_CODE (XEXP (op, 0)) == MULT
5321 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5322 && CONST_INT_P (XEXP (op, 1))
5323 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5324 INTVAL (XEXP (op, 1))) != 0)
5325 return XEXP (XEXP (op, 0), 0);
5327 /* Now handle extended register, as this may also have an optional
5328 left shift by 1..4. */
5329 if (GET_CODE (op) == ASHIFT
5330 && CONST_INT_P (XEXP (op, 1))
5331 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5332 op = XEXP (op, 0);
5334 if (GET_CODE (op) == ZERO_EXTEND
5335 || GET_CODE (op) == SIGN_EXTEND)
5336 op = XEXP (op, 0);
5338 if (op != x)
5339 return op;
5341 return x;
5344 /* Return true iff CODE is a shift supported in combination
5345 with arithmetic instructions. */
5347 static bool
5348 aarch64_shift_p (enum rtx_code code)
5350 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
5353 /* Helper function for rtx cost calculation. Calculate the cost of
5354 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5355 Return the calculated cost of the expression, recursing manually in to
5356 operands where needed. */
5358 static int
5359 aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
5361 rtx op0, op1;
5362 const struct cpu_cost_table *extra_cost
5363 = aarch64_tune_params.insn_extra_cost;
5364 int cost = 0;
5365 bool compound_p = (outer == PLUS || outer == MINUS);
5366 machine_mode mode = GET_MODE (x);
5368 gcc_checking_assert (code == MULT);
5370 op0 = XEXP (x, 0);
5371 op1 = XEXP (x, 1);
5373 if (VECTOR_MODE_P (mode))
5374 mode = GET_MODE_INNER (mode);
5376 /* Integer multiply/fma. */
5377 if (GET_MODE_CLASS (mode) == MODE_INT)
5379 /* The multiply will be canonicalized as a shift, cost it as such. */
5380 if (aarch64_shift_p (GET_CODE (x))
5381 || (CONST_INT_P (op1)
5382 && exact_log2 (INTVAL (op1)) > 0))
5384 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
5385 || GET_CODE (op0) == SIGN_EXTEND;
5386 if (speed)
5388 if (compound_p)
5390 if (REG_P (op1))
5391 /* ARITH + shift-by-register. */
5392 cost += extra_cost->alu.arith_shift_reg;
5393 else if (is_extend)
5394 /* ARITH + extended register. We don't have a cost field
5395 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5396 cost += extra_cost->alu.extend_arith;
5397 else
5398 /* ARITH + shift-by-immediate. */
5399 cost += extra_cost->alu.arith_shift;
5401 else
5402 /* LSL (immediate). */
5403 cost += extra_cost->alu.shift;
5406 /* Strip extends as we will have costed them in the case above. */
5407 if (is_extend)
5408 op0 = aarch64_strip_extend (op0);
5410 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
5412 return cost;
5415 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5416 compound and let the below cases handle it. After all, MNEG is a
5417 special-case alias of MSUB. */
5418 if (GET_CODE (op0) == NEG)
5420 op0 = XEXP (op0, 0);
5421 compound_p = true;
5424 /* Integer multiplies or FMAs have zero/sign extending variants. */
5425 if ((GET_CODE (op0) == ZERO_EXTEND
5426 && GET_CODE (op1) == ZERO_EXTEND)
5427 || (GET_CODE (op0) == SIGN_EXTEND
5428 && GET_CODE (op1) == SIGN_EXTEND))
5430 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
5431 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
5433 if (speed)
5435 if (compound_p)
5436 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
5437 cost += extra_cost->mult[0].extend_add;
5438 else
5439 /* MUL/SMULL/UMULL. */
5440 cost += extra_cost->mult[0].extend;
5443 return cost;
5446 /* This is either an integer multiply or a MADD. In both cases
5447 we want to recurse and cost the operands. */
5448 cost += rtx_cost (op0, mode, MULT, 0, speed);
5449 cost += rtx_cost (op1, mode, MULT, 1, speed);
5451 if (speed)
5453 if (compound_p)
5454 /* MADD/MSUB. */
5455 cost += extra_cost->mult[mode == DImode].add;
5456 else
5457 /* MUL. */
5458 cost += extra_cost->mult[mode == DImode].simple;
5461 return cost;
5463 else
5465 if (speed)
5467 /* Floating-point FMA/FMUL can also support negations of the
5468 operands. */
5469 if (GET_CODE (op0) == NEG)
5470 op0 = XEXP (op0, 0);
5471 if (GET_CODE (op1) == NEG)
5472 op1 = XEXP (op1, 0);
5474 if (compound_p)
5475 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5476 cost += extra_cost->fp[mode == DFmode].fma;
5477 else
5478 /* FMUL/FNMUL. */
5479 cost += extra_cost->fp[mode == DFmode].mult;
5482 cost += rtx_cost (op0, mode, MULT, 0, speed);
5483 cost += rtx_cost (op1, mode, MULT, 1, speed);
5484 return cost;
5488 static int
5489 aarch64_address_cost (rtx x,
5490 machine_mode mode,
5491 addr_space_t as ATTRIBUTE_UNUSED,
5492 bool speed)
5494 enum rtx_code c = GET_CODE (x);
5495 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
5496 struct aarch64_address_info info;
5497 int cost = 0;
5498 info.shift = 0;
5500 if (!aarch64_classify_address (&info, x, mode, c, false))
5502 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5504 /* This is a CONST or SYMBOL ref which will be split
5505 in a different way depending on the code model in use.
5506 Cost it through the generic infrastructure. */
5507 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
5508 /* Divide through by the cost of one instruction to
5509 bring it to the same units as the address costs. */
5510 cost_symbol_ref /= COSTS_N_INSNS (1);
5511 /* The cost is then the cost of preparing the address,
5512 followed by an immediate (possibly 0) offset. */
5513 return cost_symbol_ref + addr_cost->imm_offset;
5515 else
5517 /* This is most likely a jump table from a case
5518 statement. */
5519 return addr_cost->register_offset;
5523 switch (info.type)
5525 case ADDRESS_LO_SUM:
5526 case ADDRESS_SYMBOLIC:
5527 case ADDRESS_REG_IMM:
5528 cost += addr_cost->imm_offset;
5529 break;
5531 case ADDRESS_REG_WB:
5532 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5533 cost += addr_cost->pre_modify;
5534 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5535 cost += addr_cost->post_modify;
5536 else
5537 gcc_unreachable ();
5539 break;
5541 case ADDRESS_REG_REG:
5542 cost += addr_cost->register_offset;
5543 break;
5545 case ADDRESS_REG_UXTW:
5546 case ADDRESS_REG_SXTW:
5547 cost += addr_cost->register_extend;
5548 break;
5550 default:
5551 gcc_unreachable ();
5555 if (info.shift > 0)
5557 /* For the sake of calculating the cost of the shifted register
5558 component, we can treat same sized modes in the same way. */
5559 switch (GET_MODE_BITSIZE (mode))
5561 case 16:
5562 cost += addr_cost->addr_scale_costs.hi;
5563 break;
5565 case 32:
5566 cost += addr_cost->addr_scale_costs.si;
5567 break;
5569 case 64:
5570 cost += addr_cost->addr_scale_costs.di;
5571 break;
5573 /* We can't tell, or this is a 128-bit vector. */
5574 default:
5575 cost += addr_cost->addr_scale_costs.ti;
5576 break;
5580 return cost;
5583 /* Return the cost of a branch. If SPEED_P is true then the compiler is
5584 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5585 to be taken. */
5588 aarch64_branch_cost (bool speed_p, bool predictable_p)
5590 /* When optimizing for speed, use the cost of unpredictable branches. */
5591 const struct cpu_branch_cost *branch_costs =
5592 aarch64_tune_params.branch_costs;
5594 if (!speed_p || predictable_p)
5595 return branch_costs->predictable;
5596 else
5597 return branch_costs->unpredictable;
5600 /* Return true if the RTX X in mode MODE is a zero or sign extract
5601 usable in an ADD or SUB (extended register) instruction. */
5602 static bool
5603 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5605 /* Catch add with a sign extract.
5606 This is add_<optab><mode>_multp2. */
5607 if (GET_CODE (x) == SIGN_EXTRACT
5608 || GET_CODE (x) == ZERO_EXTRACT)
5610 rtx op0 = XEXP (x, 0);
5611 rtx op1 = XEXP (x, 1);
5612 rtx op2 = XEXP (x, 2);
5614 if (GET_CODE (op0) == MULT
5615 && CONST_INT_P (op1)
5616 && op2 == const0_rtx
5617 && CONST_INT_P (XEXP (op0, 1))
5618 && aarch64_is_extend_from_extract (mode,
5619 XEXP (op0, 1),
5620 op1))
5622 return true;
5625 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5626 No shift. */
5627 else if (GET_CODE (x) == SIGN_EXTEND
5628 || GET_CODE (x) == ZERO_EXTEND)
5629 return REG_P (XEXP (x, 0));
5631 return false;
5634 static bool
5635 aarch64_frint_unspec_p (unsigned int u)
5637 switch (u)
5639 case UNSPEC_FRINTZ:
5640 case UNSPEC_FRINTP:
5641 case UNSPEC_FRINTM:
5642 case UNSPEC_FRINTA:
5643 case UNSPEC_FRINTN:
5644 case UNSPEC_FRINTX:
5645 case UNSPEC_FRINTI:
5646 return true;
5648 default:
5649 return false;
5653 /* Return true iff X is an rtx that will match an extr instruction
5654 i.e. as described in the *extr<mode>5_insn family of patterns.
5655 OP0 and OP1 will be set to the operands of the shifts involved
5656 on success and will be NULL_RTX otherwise. */
5658 static bool
5659 aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
5661 rtx op0, op1;
5662 machine_mode mode = GET_MODE (x);
5664 *res_op0 = NULL_RTX;
5665 *res_op1 = NULL_RTX;
5667 if (GET_CODE (x) != IOR)
5668 return false;
5670 op0 = XEXP (x, 0);
5671 op1 = XEXP (x, 1);
5673 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
5674 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
5676 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5677 if (GET_CODE (op1) == ASHIFT)
5678 std::swap (op0, op1);
5680 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
5681 return false;
5683 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
5684 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
5686 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
5687 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
5689 *res_op0 = XEXP (op0, 0);
5690 *res_op1 = XEXP (op1, 0);
5691 return true;
5695 return false;
5698 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5699 storing it in *COST. Result is true if the total cost of the operation
5700 has now been calculated. */
5701 static bool
5702 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5704 rtx inner;
5705 rtx comparator;
5706 enum rtx_code cmpcode;
5708 if (COMPARISON_P (op0))
5710 inner = XEXP (op0, 0);
5711 comparator = XEXP (op0, 1);
5712 cmpcode = GET_CODE (op0);
5714 else
5716 inner = op0;
5717 comparator = const0_rtx;
5718 cmpcode = NE;
5721 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5723 /* Conditional branch. */
5724 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5725 return true;
5726 else
5728 if (cmpcode == NE || cmpcode == EQ)
5730 if (comparator == const0_rtx)
5732 /* TBZ/TBNZ/CBZ/CBNZ. */
5733 if (GET_CODE (inner) == ZERO_EXTRACT)
5734 /* TBZ/TBNZ. */
5735 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
5736 ZERO_EXTRACT, 0, speed);
5737 else
5738 /* CBZ/CBNZ. */
5739 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
5741 return true;
5744 else if (cmpcode == LT || cmpcode == GE)
5746 /* TBZ/TBNZ. */
5747 if (comparator == const0_rtx)
5748 return true;
5752 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5754 /* It's a conditional operation based on the status flags,
5755 so it must be some flavor of CSEL. */
5757 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5758 if (GET_CODE (op1) == NEG
5759 || GET_CODE (op1) == NOT
5760 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5761 op1 = XEXP (op1, 0);
5763 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
5764 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
5765 return true;
5768 /* We don't know what this is, cost all operands. */
5769 return false;
5772 /* Calculate the cost of calculating X, storing it in *COST. Result
5773 is true if the total cost of the operation has now been calculated. */
5774 static bool
5775 aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
5776 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5778 rtx op0, op1, op2;
5779 const struct cpu_cost_table *extra_cost
5780 = aarch64_tune_params.insn_extra_cost;
5781 int code = GET_CODE (x);
5783 /* By default, assume that everything has equivalent cost to the
5784 cheapest instruction. Any additional costs are applied as a delta
5785 above this default. */
5786 *cost = COSTS_N_INSNS (1);
5788 switch (code)
5790 case SET:
5791 /* The cost depends entirely on the operands to SET. */
5792 *cost = 0;
5793 op0 = SET_DEST (x);
5794 op1 = SET_SRC (x);
5796 switch (GET_CODE (op0))
5798 case MEM:
5799 if (speed)
5801 rtx address = XEXP (op0, 0);
5802 if (VECTOR_MODE_P (mode))
5803 *cost += extra_cost->ldst.storev;
5804 else if (GET_MODE_CLASS (mode) == MODE_INT)
5805 *cost += extra_cost->ldst.store;
5806 else if (mode == SFmode)
5807 *cost += extra_cost->ldst.storef;
5808 else if (mode == DFmode)
5809 *cost += extra_cost->ldst.stored;
5811 *cost +=
5812 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5813 0, speed));
5816 *cost += rtx_cost (op1, mode, SET, 1, speed);
5817 return true;
5819 case SUBREG:
5820 if (! REG_P (SUBREG_REG (op0)))
5821 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
5823 /* Fall through. */
5824 case REG:
5825 /* The cost is one per vector-register copied. */
5826 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
5828 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5829 / GET_MODE_SIZE (V4SImode);
5830 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5832 /* const0_rtx is in general free, but we will use an
5833 instruction to set a register to 0. */
5834 else if (REG_P (op1) || op1 == const0_rtx)
5836 /* The cost is 1 per register copied. */
5837 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5838 / UNITS_PER_WORD;
5839 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5841 else
5842 /* Cost is just the cost of the RHS of the set. */
5843 *cost += rtx_cost (op1, mode, SET, 1, speed);
5844 return true;
5846 case ZERO_EXTRACT:
5847 case SIGN_EXTRACT:
5848 /* Bit-field insertion. Strip any redundant widening of
5849 the RHS to meet the width of the target. */
5850 if (GET_CODE (op1) == SUBREG)
5851 op1 = SUBREG_REG (op1);
5852 if ((GET_CODE (op1) == ZERO_EXTEND
5853 || GET_CODE (op1) == SIGN_EXTEND)
5854 && CONST_INT_P (XEXP (op0, 1))
5855 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5856 >= INTVAL (XEXP (op0, 1))))
5857 op1 = XEXP (op1, 0);
5859 if (CONST_INT_P (op1))
5861 /* MOV immediate is assumed to always be cheap. */
5862 *cost = COSTS_N_INSNS (1);
5864 else
5866 /* BFM. */
5867 if (speed)
5868 *cost += extra_cost->alu.bfi;
5869 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
5872 return true;
5874 default:
5875 /* We can't make sense of this, assume default cost. */
5876 *cost = COSTS_N_INSNS (1);
5877 return false;
5879 return false;
5881 case CONST_INT:
5882 /* If an instruction can incorporate a constant within the
5883 instruction, the instruction's expression avoids calling
5884 rtx_cost() on the constant. If rtx_cost() is called on a
5885 constant, then it is usually because the constant must be
5886 moved into a register by one or more instructions.
5888 The exception is constant 0, which can be expressed
5889 as XZR/WZR and is therefore free. The exception to this is
5890 if we have (set (reg) (const0_rtx)) in which case we must cost
5891 the move. However, we can catch that when we cost the SET, so
5892 we don't need to consider that here. */
5893 if (x == const0_rtx)
5894 *cost = 0;
5895 else
5897 /* To an approximation, building any other constant is
5898 proportionally expensive to the number of instructions
5899 required to build that constant. This is true whether we
5900 are compiling for SPEED or otherwise. */
5901 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5902 (NULL_RTX, x, false, mode));
5904 return true;
5906 case CONST_DOUBLE:
5907 if (speed)
5909 /* mov[df,sf]_aarch64. */
5910 if (aarch64_float_const_representable_p (x))
5911 /* FMOV (scalar immediate). */
5912 *cost += extra_cost->fp[mode == DFmode].fpconst;
5913 else if (!aarch64_float_const_zero_rtx_p (x))
5915 /* This will be a load from memory. */
5916 if (mode == DFmode)
5917 *cost += extra_cost->ldst.loadd;
5918 else
5919 *cost += extra_cost->ldst.loadf;
5921 else
5922 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5923 or MOV v0.s[0], wzr - neither of which are modeled by the
5924 cost tables. Just use the default cost. */
5929 return true;
5931 case MEM:
5932 if (speed)
5934 /* For loads we want the base cost of a load, plus an
5935 approximation for the additional cost of the addressing
5936 mode. */
5937 rtx address = XEXP (x, 0);
5938 if (VECTOR_MODE_P (mode))
5939 *cost += extra_cost->ldst.loadv;
5940 else if (GET_MODE_CLASS (mode) == MODE_INT)
5941 *cost += extra_cost->ldst.load;
5942 else if (mode == SFmode)
5943 *cost += extra_cost->ldst.loadf;
5944 else if (mode == DFmode)
5945 *cost += extra_cost->ldst.loadd;
5947 *cost +=
5948 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5949 0, speed));
5952 return true;
5954 case NEG:
5955 op0 = XEXP (x, 0);
5957 if (VECTOR_MODE_P (mode))
5959 if (speed)
5961 /* FNEG. */
5962 *cost += extra_cost->vect.alu;
5964 return false;
5967 if (GET_MODE_CLASS (mode) == MODE_INT)
5969 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5970 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5972 /* CSETM. */
5973 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
5974 return true;
5977 /* Cost this as SUB wzr, X. */
5978 op0 = CONST0_RTX (mode);
5979 op1 = XEXP (x, 0);
5980 goto cost_minus;
5983 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5985 /* Support (neg(fma...)) as a single instruction only if
5986 sign of zeros is unimportant. This matches the decision
5987 making in aarch64.md. */
5988 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5990 /* FNMADD. */
5991 *cost = rtx_cost (op0, mode, NEG, 0, speed);
5992 return true;
5994 if (speed)
5995 /* FNEG. */
5996 *cost += extra_cost->fp[mode == DFmode].neg;
5997 return false;
6000 return false;
6002 case CLRSB:
6003 case CLZ:
6004 if (speed)
6006 if (VECTOR_MODE_P (mode))
6007 *cost += extra_cost->vect.alu;
6008 else
6009 *cost += extra_cost->alu.clz;
6012 return false;
6014 case COMPARE:
6015 op0 = XEXP (x, 0);
6016 op1 = XEXP (x, 1);
6018 if (op1 == const0_rtx
6019 && GET_CODE (op0) == AND)
6021 x = op0;
6022 mode = GET_MODE (op0);
6023 goto cost_logic;
6026 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
6028 /* TODO: A write to the CC flags possibly costs extra, this
6029 needs encoding in the cost tables. */
6031 /* CC_ZESWPmode supports zero extend for free. */
6032 if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
6033 op0 = XEXP (op0, 0);
6035 mode = GET_MODE (op0);
6036 /* ANDS. */
6037 if (GET_CODE (op0) == AND)
6039 x = op0;
6040 goto cost_logic;
6043 if (GET_CODE (op0) == PLUS)
6045 /* ADDS (and CMN alias). */
6046 x = op0;
6047 goto cost_plus;
6050 if (GET_CODE (op0) == MINUS)
6052 /* SUBS. */
6053 x = op0;
6054 goto cost_minus;
6057 if (GET_CODE (op1) == NEG)
6059 /* CMN. */
6060 if (speed)
6061 *cost += extra_cost->alu.arith;
6063 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
6064 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
6065 return true;
6068 /* CMP.
6070 Compare can freely swap the order of operands, and
6071 canonicalization puts the more complex operation first.
6072 But the integer MINUS logic expects the shift/extend
6073 operation in op1. */
6074 if (! (REG_P (op0)
6075 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
6077 op0 = XEXP (x, 1);
6078 op1 = XEXP (x, 0);
6080 goto cost_minus;
6083 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
6085 /* FCMP. */
6086 if (speed)
6087 *cost += extra_cost->fp[mode == DFmode].compare;
6089 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
6091 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
6092 /* FCMP supports constant 0.0 for no extra cost. */
6093 return true;
6095 return false;
6098 if (VECTOR_MODE_P (mode))
6100 /* Vector compare. */
6101 if (speed)
6102 *cost += extra_cost->vect.alu;
6104 if (aarch64_float_const_zero_rtx_p (op1))
6106 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6107 cost. */
6108 return true;
6110 return false;
6112 return false;
6114 case MINUS:
6116 op0 = XEXP (x, 0);
6117 op1 = XEXP (x, 1);
6119 cost_minus:
6120 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
6122 /* Detect valid immediates. */
6123 if ((GET_MODE_CLASS (mode) == MODE_INT
6124 || (GET_MODE_CLASS (mode) == MODE_CC
6125 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
6126 && CONST_INT_P (op1)
6127 && aarch64_uimm12_shift (INTVAL (op1)))
6129 if (speed)
6130 /* SUB(S) (immediate). */
6131 *cost += extra_cost->alu.arith;
6132 return true;
6135 /* Look for SUB (extended register). */
6136 if (aarch64_rtx_arith_op_extract_p (op1, mode))
6138 if (speed)
6139 *cost += extra_cost->alu.extend_arith;
6141 op1 = aarch64_strip_extend (op1);
6142 *cost += rtx_cost (op1, VOIDmode,
6143 (enum rtx_code) GET_CODE (op1), 0, speed);
6144 return true;
6147 rtx new_op1 = aarch64_strip_extend (op1);
6149 /* Cost this as an FMA-alike operation. */
6150 if ((GET_CODE (new_op1) == MULT
6151 || aarch64_shift_p (GET_CODE (new_op1)))
6152 && code != COMPARE)
6154 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
6155 (enum rtx_code) code,
6156 speed);
6157 return true;
6160 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
6162 if (speed)
6164 if (VECTOR_MODE_P (mode))
6166 /* Vector SUB. */
6167 *cost += extra_cost->vect.alu;
6169 else if (GET_MODE_CLASS (mode) == MODE_INT)
6171 /* SUB(S). */
6172 *cost += extra_cost->alu.arith;
6174 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6176 /* FSUB. */
6177 *cost += extra_cost->fp[mode == DFmode].addsub;
6180 return true;
6183 case PLUS:
6185 rtx new_op0;
6187 op0 = XEXP (x, 0);
6188 op1 = XEXP (x, 1);
6190 cost_plus:
6191 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
6192 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
6194 /* CSINC. */
6195 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
6196 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
6197 return true;
6200 if (GET_MODE_CLASS (mode) == MODE_INT
6201 && CONST_INT_P (op1)
6202 && aarch64_uimm12_shift (INTVAL (op1)))
6204 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
6206 if (speed)
6207 /* ADD (immediate). */
6208 *cost += extra_cost->alu.arith;
6209 return true;
6212 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
6214 /* Look for ADD (extended register). */
6215 if (aarch64_rtx_arith_op_extract_p (op0, mode))
6217 if (speed)
6218 *cost += extra_cost->alu.extend_arith;
6220 op0 = aarch64_strip_extend (op0);
6221 *cost += rtx_cost (op0, VOIDmode,
6222 (enum rtx_code) GET_CODE (op0), 0, speed);
6223 return true;
6226 /* Strip any extend, leave shifts behind as we will
6227 cost them through mult_cost. */
6228 new_op0 = aarch64_strip_extend (op0);
6230 if (GET_CODE (new_op0) == MULT
6231 || aarch64_shift_p (GET_CODE (new_op0)))
6233 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
6234 speed);
6235 return true;
6238 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
6240 if (speed)
6242 if (VECTOR_MODE_P (mode))
6244 /* Vector ADD. */
6245 *cost += extra_cost->vect.alu;
6247 else if (GET_MODE_CLASS (mode) == MODE_INT)
6249 /* ADD. */
6250 *cost += extra_cost->alu.arith;
6252 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6254 /* FADD. */
6255 *cost += extra_cost->fp[mode == DFmode].addsub;
6258 return true;
6261 case BSWAP:
6262 *cost = COSTS_N_INSNS (1);
6264 if (speed)
6266 if (VECTOR_MODE_P (mode))
6267 *cost += extra_cost->vect.alu;
6268 else
6269 *cost += extra_cost->alu.rev;
6271 return false;
6273 case IOR:
6274 if (aarch_rev16_p (x))
6276 *cost = COSTS_N_INSNS (1);
6278 if (speed)
6280 if (VECTOR_MODE_P (mode))
6281 *cost += extra_cost->vect.alu;
6282 else
6283 *cost += extra_cost->alu.rev;
6285 return true;
6288 if (aarch64_extr_rtx_p (x, &op0, &op1))
6290 *cost += rtx_cost (op0, mode, IOR, 0, speed);
6291 *cost += rtx_cost (op1, mode, IOR, 1, speed);
6292 if (speed)
6293 *cost += extra_cost->alu.shift;
6295 return true;
6297 /* Fall through. */
6298 case XOR:
6299 case AND:
6300 cost_logic:
6301 op0 = XEXP (x, 0);
6302 op1 = XEXP (x, 1);
6304 if (VECTOR_MODE_P (mode))
6306 if (speed)
6307 *cost += extra_cost->vect.alu;
6308 return true;
6311 if (code == AND
6312 && GET_CODE (op0) == MULT
6313 && CONST_INT_P (XEXP (op0, 1))
6314 && CONST_INT_P (op1)
6315 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
6316 INTVAL (op1)) != 0)
6318 /* This is a UBFM/SBFM. */
6319 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
6320 if (speed)
6321 *cost += extra_cost->alu.bfx;
6322 return true;
6325 if (GET_MODE_CLASS (mode) == MODE_INT)
6327 /* We possibly get the immediate for free, this is not
6328 modelled. */
6329 if (CONST_INT_P (op1)
6330 && aarch64_bitmask_imm (INTVAL (op1), mode))
6332 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6334 if (speed)
6335 *cost += extra_cost->alu.logical;
6337 return true;
6339 else
6341 rtx new_op0 = op0;
6343 /* Handle ORN, EON, or BIC. */
6344 if (GET_CODE (op0) == NOT)
6345 op0 = XEXP (op0, 0);
6347 new_op0 = aarch64_strip_shift (op0);
6349 /* If we had a shift on op0 then this is a logical-shift-
6350 by-register/immediate operation. Otherwise, this is just
6351 a logical operation. */
6352 if (speed)
6354 if (new_op0 != op0)
6356 /* Shift by immediate. */
6357 if (CONST_INT_P (XEXP (op0, 1)))
6358 *cost += extra_cost->alu.log_shift;
6359 else
6360 *cost += extra_cost->alu.log_shift_reg;
6362 else
6363 *cost += extra_cost->alu.logical;
6366 /* In both cases we want to cost both operands. */
6367 *cost += rtx_cost (new_op0, mode, (enum rtx_code) code, 0, speed);
6368 *cost += rtx_cost (op1, mode, (enum rtx_code) code, 1, speed);
6370 return true;
6373 return false;
6375 case NOT:
6376 x = XEXP (x, 0);
6377 op0 = aarch64_strip_shift (x);
6379 if (VECTOR_MODE_P (mode))
6381 /* Vector NOT. */
6382 *cost += extra_cost->vect.alu;
6383 return false;
6386 /* MVN-shifted-reg. */
6387 if (op0 != x)
6389 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6391 if (speed)
6392 *cost += extra_cost->alu.log_shift;
6394 return true;
6396 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6397 Handle the second form here taking care that 'a' in the above can
6398 be a shift. */
6399 else if (GET_CODE (op0) == XOR)
6401 rtx newop0 = XEXP (op0, 0);
6402 rtx newop1 = XEXP (op0, 1);
6403 rtx op0_stripped = aarch64_strip_shift (newop0);
6405 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
6406 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6408 if (speed)
6410 if (op0_stripped != newop0)
6411 *cost += extra_cost->alu.log_shift;
6412 else
6413 *cost += extra_cost->alu.logical;
6416 return true;
6418 /* MVN. */
6419 if (speed)
6420 *cost += extra_cost->alu.logical;
6422 return false;
6424 case ZERO_EXTEND:
6426 op0 = XEXP (x, 0);
6427 /* If a value is written in SI mode, then zero extended to DI
6428 mode, the operation will in general be free as a write to
6429 a 'w' register implicitly zeroes the upper bits of an 'x'
6430 register. However, if this is
6432 (set (reg) (zero_extend (reg)))
6434 we must cost the explicit register move. */
6435 if (mode == DImode
6436 && GET_MODE (op0) == SImode
6437 && outer == SET)
6439 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
6441 if (!op_cost && speed)
6442 /* MOV. */
6443 *cost += extra_cost->alu.extend;
6444 else
6445 /* Free, the cost is that of the SI mode operation. */
6446 *cost = op_cost;
6448 return true;
6450 else if (MEM_P (op0))
6452 /* All loads can zero extend to any size for free. */
6453 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
6454 return true;
6457 if (speed)
6459 if (VECTOR_MODE_P (mode))
6461 /* UMOV. */
6462 *cost += extra_cost->vect.alu;
6464 else
6466 /* UXTB/UXTH. */
6467 *cost += extra_cost->alu.extend;
6470 return false;
6472 case SIGN_EXTEND:
6473 if (MEM_P (XEXP (x, 0)))
6475 /* LDRSH. */
6476 if (speed)
6478 rtx address = XEXP (XEXP (x, 0), 0);
6479 *cost += extra_cost->ldst.load_sign_extend;
6481 *cost +=
6482 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6483 0, speed));
6485 return true;
6488 if (speed)
6490 if (VECTOR_MODE_P (mode))
6491 *cost += extra_cost->vect.alu;
6492 else
6493 *cost += extra_cost->alu.extend;
6495 return false;
6497 case ASHIFT:
6498 op0 = XEXP (x, 0);
6499 op1 = XEXP (x, 1);
6501 if (CONST_INT_P (op1))
6503 if (speed)
6505 if (VECTOR_MODE_P (mode))
6507 /* Vector shift (immediate). */
6508 *cost += extra_cost->vect.alu;
6510 else
6512 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6513 aliases. */
6514 *cost += extra_cost->alu.shift;
6518 /* We can incorporate zero/sign extend for free. */
6519 if (GET_CODE (op0) == ZERO_EXTEND
6520 || GET_CODE (op0) == SIGN_EXTEND)
6521 op0 = XEXP (op0, 0);
6523 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
6524 return true;
6526 else
6528 if (speed)
6530 if (VECTOR_MODE_P (mode))
6532 /* Vector shift (register). */
6533 *cost += extra_cost->vect.alu;
6535 else
6537 /* LSLV. */
6538 *cost += extra_cost->alu.shift_reg;
6541 return false; /* All arguments need to be in registers. */
6544 case ROTATE:
6545 case ROTATERT:
6546 case LSHIFTRT:
6547 case ASHIFTRT:
6548 op0 = XEXP (x, 0);
6549 op1 = XEXP (x, 1);
6551 if (CONST_INT_P (op1))
6553 /* ASR (immediate) and friends. */
6554 if (speed)
6556 if (VECTOR_MODE_P (mode))
6557 *cost += extra_cost->vect.alu;
6558 else
6559 *cost += extra_cost->alu.shift;
6562 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6563 return true;
6565 else
6568 /* ASR (register) and friends. */
6569 if (speed)
6571 if (VECTOR_MODE_P (mode))
6572 *cost += extra_cost->vect.alu;
6573 else
6574 *cost += extra_cost->alu.shift_reg;
6576 return false; /* All arguments need to be in registers. */
6579 case SYMBOL_REF:
6581 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
6582 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
6584 /* LDR. */
6585 if (speed)
6586 *cost += extra_cost->ldst.load;
6588 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6589 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6591 /* ADRP, followed by ADD. */
6592 *cost += COSTS_N_INSNS (1);
6593 if (speed)
6594 *cost += 2 * extra_cost->alu.arith;
6596 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6597 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6599 /* ADR. */
6600 if (speed)
6601 *cost += extra_cost->alu.arith;
6604 if (flag_pic)
6606 /* One extra load instruction, after accessing the GOT. */
6607 *cost += COSTS_N_INSNS (1);
6608 if (speed)
6609 *cost += extra_cost->ldst.load;
6611 return true;
6613 case HIGH:
6614 case LO_SUM:
6615 /* ADRP/ADD (immediate). */
6616 if (speed)
6617 *cost += extra_cost->alu.arith;
6618 return true;
6620 case ZERO_EXTRACT:
6621 case SIGN_EXTRACT:
6622 /* UBFX/SBFX. */
6623 if (speed)
6625 if (VECTOR_MODE_P (mode))
6626 *cost += extra_cost->vect.alu;
6627 else
6628 *cost += extra_cost->alu.bfx;
6631 /* We can trust that the immediates used will be correct (there
6632 are no by-register forms), so we need only cost op0. */
6633 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
6634 return true;
6636 case MULT:
6637 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6638 /* aarch64_rtx_mult_cost always handles recursion to its
6639 operands. */
6640 return true;
6642 case MOD:
6643 case UMOD:
6644 if (speed)
6646 if (VECTOR_MODE_P (mode))
6647 *cost += extra_cost->vect.alu;
6648 else if (GET_MODE_CLASS (mode) == MODE_INT)
6649 *cost += (extra_cost->mult[mode == DImode].add
6650 + extra_cost->mult[mode == DImode].idiv);
6651 else if (mode == DFmode)
6652 *cost += (extra_cost->fp[1].mult
6653 + extra_cost->fp[1].div);
6654 else if (mode == SFmode)
6655 *cost += (extra_cost->fp[0].mult
6656 + extra_cost->fp[0].div);
6658 return false; /* All arguments need to be in registers. */
6660 case DIV:
6661 case UDIV:
6662 case SQRT:
6663 if (speed)
6665 if (VECTOR_MODE_P (mode))
6666 *cost += extra_cost->vect.alu;
6667 else if (GET_MODE_CLASS (mode) == MODE_INT)
6668 /* There is no integer SQRT, so only DIV and UDIV can get
6669 here. */
6670 *cost += extra_cost->mult[mode == DImode].idiv;
6671 else
6672 *cost += extra_cost->fp[mode == DFmode].div;
6674 return false; /* All arguments need to be in registers. */
6676 case IF_THEN_ELSE:
6677 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6678 XEXP (x, 2), cost, speed);
6680 case EQ:
6681 case NE:
6682 case GT:
6683 case GTU:
6684 case LT:
6685 case LTU:
6686 case GE:
6687 case GEU:
6688 case LE:
6689 case LEU:
6691 return false; /* All arguments must be in registers. */
6693 case FMA:
6694 op0 = XEXP (x, 0);
6695 op1 = XEXP (x, 1);
6696 op2 = XEXP (x, 2);
6698 if (speed)
6700 if (VECTOR_MODE_P (mode))
6701 *cost += extra_cost->vect.alu;
6702 else
6703 *cost += extra_cost->fp[mode == DFmode].fma;
6706 /* FMSUB, FNMADD, and FNMSUB are free. */
6707 if (GET_CODE (op0) == NEG)
6708 op0 = XEXP (op0, 0);
6710 if (GET_CODE (op2) == NEG)
6711 op2 = XEXP (op2, 0);
6713 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6714 and the by-element operand as operand 0. */
6715 if (GET_CODE (op1) == NEG)
6716 op1 = XEXP (op1, 0);
6718 /* Catch vector-by-element operations. The by-element operand can
6719 either be (vec_duplicate (vec_select (x))) or just
6720 (vec_select (x)), depending on whether we are multiplying by
6721 a vector or a scalar.
6723 Canonicalization is not very good in these cases, FMA4 will put the
6724 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6725 if (GET_CODE (op0) == VEC_DUPLICATE)
6726 op0 = XEXP (op0, 0);
6727 else if (GET_CODE (op1) == VEC_DUPLICATE)
6728 op1 = XEXP (op1, 0);
6730 if (GET_CODE (op0) == VEC_SELECT)
6731 op0 = XEXP (op0, 0);
6732 else if (GET_CODE (op1) == VEC_SELECT)
6733 op1 = XEXP (op1, 0);
6735 /* If the remaining parameters are not registers,
6736 get the cost to put them into registers. */
6737 *cost += rtx_cost (op0, mode, FMA, 0, speed);
6738 *cost += rtx_cost (op1, mode, FMA, 1, speed);
6739 *cost += rtx_cost (op2, mode, FMA, 2, speed);
6740 return true;
6742 case FLOAT:
6743 case UNSIGNED_FLOAT:
6744 if (speed)
6745 *cost += extra_cost->fp[mode == DFmode].fromint;
6746 return false;
6748 case FLOAT_EXTEND:
6749 if (speed)
6751 if (VECTOR_MODE_P (mode))
6753 /*Vector truncate. */
6754 *cost += extra_cost->vect.alu;
6756 else
6757 *cost += extra_cost->fp[mode == DFmode].widen;
6759 return false;
6761 case FLOAT_TRUNCATE:
6762 if (speed)
6764 if (VECTOR_MODE_P (mode))
6766 /*Vector conversion. */
6767 *cost += extra_cost->vect.alu;
6769 else
6770 *cost += extra_cost->fp[mode == DFmode].narrow;
6772 return false;
6774 case FIX:
6775 case UNSIGNED_FIX:
6776 x = XEXP (x, 0);
6777 /* Strip the rounding part. They will all be implemented
6778 by the fcvt* family of instructions anyway. */
6779 if (GET_CODE (x) == UNSPEC)
6781 unsigned int uns_code = XINT (x, 1);
6783 if (uns_code == UNSPEC_FRINTA
6784 || uns_code == UNSPEC_FRINTM
6785 || uns_code == UNSPEC_FRINTN
6786 || uns_code == UNSPEC_FRINTP
6787 || uns_code == UNSPEC_FRINTZ)
6788 x = XVECEXP (x, 0, 0);
6791 if (speed)
6793 if (VECTOR_MODE_P (mode))
6794 *cost += extra_cost->vect.alu;
6795 else
6796 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6798 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
6799 return true;
6801 case ABS:
6802 if (VECTOR_MODE_P (mode))
6804 /* ABS (vector). */
6805 if (speed)
6806 *cost += extra_cost->vect.alu;
6808 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6810 op0 = XEXP (x, 0);
6812 /* FABD, which is analogous to FADD. */
6813 if (GET_CODE (op0) == MINUS)
6815 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
6816 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
6817 if (speed)
6818 *cost += extra_cost->fp[mode == DFmode].addsub;
6820 return true;
6822 /* Simple FABS is analogous to FNEG. */
6823 if (speed)
6824 *cost += extra_cost->fp[mode == DFmode].neg;
6826 else
6828 /* Integer ABS will either be split to
6829 two arithmetic instructions, or will be an ABS
6830 (scalar), which we don't model. */
6831 *cost = COSTS_N_INSNS (2);
6832 if (speed)
6833 *cost += 2 * extra_cost->alu.arith;
6835 return false;
6837 case SMAX:
6838 case SMIN:
6839 if (speed)
6841 if (VECTOR_MODE_P (mode))
6842 *cost += extra_cost->vect.alu;
6843 else
6845 /* FMAXNM/FMINNM/FMAX/FMIN.
6846 TODO: This may not be accurate for all implementations, but
6847 we do not model this in the cost tables. */
6848 *cost += extra_cost->fp[mode == DFmode].addsub;
6851 return false;
6853 case UNSPEC:
6854 /* The floating point round to integer frint* instructions. */
6855 if (aarch64_frint_unspec_p (XINT (x, 1)))
6857 if (speed)
6858 *cost += extra_cost->fp[mode == DFmode].roundint;
6860 return false;
6863 if (XINT (x, 1) == UNSPEC_RBIT)
6865 if (speed)
6866 *cost += extra_cost->alu.rev;
6868 return false;
6870 break;
6872 case TRUNCATE:
6874 /* Decompose <su>muldi3_highpart. */
6875 if (/* (truncate:DI */
6876 mode == DImode
6877 /* (lshiftrt:TI */
6878 && GET_MODE (XEXP (x, 0)) == TImode
6879 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6880 /* (mult:TI */
6881 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6882 /* (ANY_EXTEND:TI (reg:DI))
6883 (ANY_EXTEND:TI (reg:DI))) */
6884 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6885 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6886 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6887 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6888 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6889 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6890 /* (const_int 64) */
6891 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6892 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6894 /* UMULH/SMULH. */
6895 if (speed)
6896 *cost += extra_cost->mult[mode == DImode].extend;
6897 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6898 mode, MULT, 0, speed);
6899 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6900 mode, MULT, 1, speed);
6901 return true;
6904 /* Fall through. */
6905 default:
6906 break;
6909 if (dump_file && (dump_flags & TDF_DETAILS))
6910 fprintf (dump_file,
6911 "\nFailed to cost RTX. Assuming default cost.\n");
6913 return true;
6916 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6917 calculated for X. This cost is stored in *COST. Returns true
6918 if the total cost of X was calculated. */
6919 static bool
6920 aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
6921 int param, int *cost, bool speed)
6923 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
6925 if (dump_file && (dump_flags & TDF_DETAILS))
6927 print_rtl_single (dump_file, x);
6928 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6929 speed ? "Hot" : "Cold",
6930 *cost, result ? "final" : "partial");
6933 return result;
6936 static int
6937 aarch64_register_move_cost (machine_mode mode,
6938 reg_class_t from_i, reg_class_t to_i)
6940 enum reg_class from = (enum reg_class) from_i;
6941 enum reg_class to = (enum reg_class) to_i;
6942 const struct cpu_regmove_cost *regmove_cost
6943 = aarch64_tune_params.regmove_cost;
6945 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6946 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6947 to = GENERAL_REGS;
6949 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6950 from = GENERAL_REGS;
6952 /* Moving between GPR and stack cost is the same as GP2GP. */
6953 if ((from == GENERAL_REGS && to == STACK_REG)
6954 || (to == GENERAL_REGS && from == STACK_REG))
6955 return regmove_cost->GP2GP;
6957 /* To/From the stack register, we move via the gprs. */
6958 if (to == STACK_REG || from == STACK_REG)
6959 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6960 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6962 if (GET_MODE_SIZE (mode) == 16)
6964 /* 128-bit operations on general registers require 2 instructions. */
6965 if (from == GENERAL_REGS && to == GENERAL_REGS)
6966 return regmove_cost->GP2GP * 2;
6967 else if (from == GENERAL_REGS)
6968 return regmove_cost->GP2FP * 2;
6969 else if (to == GENERAL_REGS)
6970 return regmove_cost->FP2GP * 2;
6972 /* When AdvSIMD instructions are disabled it is not possible to move
6973 a 128-bit value directly between Q registers. This is handled in
6974 secondary reload. A general register is used as a scratch to move
6975 the upper DI value and the lower DI value is moved directly,
6976 hence the cost is the sum of three moves. */
6977 if (! TARGET_SIMD)
6978 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6980 return regmove_cost->FP2FP;
6983 if (from == GENERAL_REGS && to == GENERAL_REGS)
6984 return regmove_cost->GP2GP;
6985 else if (from == GENERAL_REGS)
6986 return regmove_cost->GP2FP;
6987 else if (to == GENERAL_REGS)
6988 return regmove_cost->FP2GP;
6990 return regmove_cost->FP2FP;
6993 static int
6994 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6995 reg_class_t rclass ATTRIBUTE_UNUSED,
6996 bool in ATTRIBUTE_UNUSED)
6998 return aarch64_tune_params.memmov_cost;
7001 /* Return the number of instructions that can be issued per cycle. */
7002 static int
7003 aarch64_sched_issue_rate (void)
7005 return aarch64_tune_params.issue_rate;
7008 static int
7009 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7011 int issue_rate = aarch64_sched_issue_rate ();
7013 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
7016 /* Vectorizer cost model target hooks. */
7018 /* Implement targetm.vectorize.builtin_vectorization_cost. */
7019 static int
7020 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
7021 tree vectype,
7022 int misalign ATTRIBUTE_UNUSED)
7024 unsigned elements;
7026 switch (type_of_cost)
7028 case scalar_stmt:
7029 return aarch64_tune_params.vec_costs->scalar_stmt_cost;
7031 case scalar_load:
7032 return aarch64_tune_params.vec_costs->scalar_load_cost;
7034 case scalar_store:
7035 return aarch64_tune_params.vec_costs->scalar_store_cost;
7037 case vector_stmt:
7038 return aarch64_tune_params.vec_costs->vec_stmt_cost;
7040 case vector_load:
7041 return aarch64_tune_params.vec_costs->vec_align_load_cost;
7043 case vector_store:
7044 return aarch64_tune_params.vec_costs->vec_store_cost;
7046 case vec_to_scalar:
7047 return aarch64_tune_params.vec_costs->vec_to_scalar_cost;
7049 case scalar_to_vec:
7050 return aarch64_tune_params.vec_costs->scalar_to_vec_cost;
7052 case unaligned_load:
7053 return aarch64_tune_params.vec_costs->vec_unalign_load_cost;
7055 case unaligned_store:
7056 return aarch64_tune_params.vec_costs->vec_unalign_store_cost;
7058 case cond_branch_taken:
7059 return aarch64_tune_params.vec_costs->cond_taken_branch_cost;
7061 case cond_branch_not_taken:
7062 return aarch64_tune_params.vec_costs->cond_not_taken_branch_cost;
7064 case vec_perm:
7065 case vec_promote_demote:
7066 return aarch64_tune_params.vec_costs->vec_stmt_cost;
7068 case vec_construct:
7069 elements = TYPE_VECTOR_SUBPARTS (vectype);
7070 return elements / 2 + 1;
7072 default:
7073 gcc_unreachable ();
7077 /* Implement targetm.vectorize.add_stmt_cost. */
7078 static unsigned
7079 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
7080 struct _stmt_vec_info *stmt_info, int misalign,
7081 enum vect_cost_model_location where)
7083 unsigned *cost = (unsigned *) data;
7084 unsigned retval = 0;
7086 if (flag_vect_cost_model)
7088 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
7089 int stmt_cost =
7090 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
7092 /* Statements in an inner loop relative to the loop being
7093 vectorized are weighted more heavily. The value here is
7094 a function (linear for now) of the loop nest level. */
7095 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
7097 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7098 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
7099 unsigned nest_level = loop_depth (loop);
7101 count *= nest_level;
7104 retval = (unsigned) (count * stmt_cost);
7105 cost[where] += retval;
7108 return retval;
7111 static void initialize_aarch64_code_model (struct gcc_options *);
7113 /* Enum describing the various ways that the
7114 aarch64_parse_{arch,tune,cpu,extension} functions can fail.
7115 This way their callers can choose what kind of error to give. */
7117 enum aarch64_parse_opt_result
7119 AARCH64_PARSE_OK, /* Parsing was successful. */
7120 AARCH64_PARSE_MISSING_ARG, /* Missing argument. */
7121 AARCH64_PARSE_INVALID_FEATURE, /* Invalid feature modifier. */
7122 AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */
7125 /* Parse the architecture extension string STR and update ISA_FLAGS
7126 with the architecture features turned on or off. Return a
7127 aarch64_parse_opt_result describing the result. */
7129 static enum aarch64_parse_opt_result
7130 aarch64_parse_extension (char *str, unsigned long *isa_flags)
7132 /* The extension string is parsed left to right. */
7133 const struct aarch64_option_extension *opt = NULL;
7135 /* Flag to say whether we are adding or removing an extension. */
7136 int adding_ext = -1;
7138 while (str != NULL && *str != 0)
7140 char *ext;
7141 size_t len;
7143 str++;
7144 ext = strchr (str, '+');
7146 if (ext != NULL)
7147 len = ext - str;
7148 else
7149 len = strlen (str);
7151 if (len >= 2 && strncmp (str, "no", 2) == 0)
7153 adding_ext = 0;
7154 len -= 2;
7155 str += 2;
7157 else if (len > 0)
7158 adding_ext = 1;
7160 if (len == 0)
7161 return AARCH64_PARSE_MISSING_ARG;
7164 /* Scan over the extensions table trying to find an exact match. */
7165 for (opt = all_extensions; opt->name != NULL; opt++)
7167 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
7169 /* Add or remove the extension. */
7170 if (adding_ext)
7171 *isa_flags |= opt->flags_on;
7172 else
7173 *isa_flags &= ~(opt->flags_off);
7174 break;
7178 if (opt->name == NULL)
7180 /* Extension not found in list. */
7181 return AARCH64_PARSE_INVALID_FEATURE;
7184 str = ext;
7187 return AARCH64_PARSE_OK;
7190 /* Parse the TO_PARSE string and put the architecture struct that it
7191 selects into RES and the architectural features into ISA_FLAGS.
7192 Return an aarch64_parse_opt_result describing the parse result.
7193 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
7195 static enum aarch64_parse_opt_result
7196 aarch64_parse_arch (const char *to_parse, const struct processor **res,
7197 unsigned long *isa_flags)
7199 char *ext;
7200 const struct processor *arch;
7201 char *str = (char *) alloca (strlen (to_parse) + 1);
7202 size_t len;
7204 strcpy (str, to_parse);
7206 ext = strchr (str, '+');
7208 if (ext != NULL)
7209 len = ext - str;
7210 else
7211 len = strlen (str);
7213 if (len == 0)
7214 return AARCH64_PARSE_MISSING_ARG;
7217 /* Loop through the list of supported ARCHes to find a match. */
7218 for (arch = all_architectures; arch->name != NULL; arch++)
7220 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
7222 unsigned long isa_temp = arch->flags;
7224 if (ext != NULL)
7226 /* TO_PARSE string contains at least one extension. */
7227 enum aarch64_parse_opt_result ext_res
7228 = aarch64_parse_extension (ext, &isa_temp);
7230 if (ext_res != AARCH64_PARSE_OK)
7231 return ext_res;
7233 /* Extension parsing was successful. Confirm the result
7234 arch and ISA flags. */
7235 *res = arch;
7236 *isa_flags = isa_temp;
7237 return AARCH64_PARSE_OK;
7241 /* ARCH name not found in list. */
7242 return AARCH64_PARSE_INVALID_ARG;
7245 /* Parse the TO_PARSE string and put the result tuning in RES and the
7246 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7247 describing the parse result. If there is an error parsing, RES and
7248 ISA_FLAGS are left unchanged. */
7250 static enum aarch64_parse_opt_result
7251 aarch64_parse_cpu (const char *to_parse, const struct processor **res,
7252 unsigned long *isa_flags)
7254 char *ext;
7255 const struct processor *cpu;
7256 char *str = (char *) alloca (strlen (to_parse) + 1);
7257 size_t len;
7259 strcpy (str, to_parse);
7261 ext = strchr (str, '+');
7263 if (ext != NULL)
7264 len = ext - str;
7265 else
7266 len = strlen (str);
7268 if (len == 0)
7269 return AARCH64_PARSE_MISSING_ARG;
7272 /* Loop through the list of supported CPUs to find a match. */
7273 for (cpu = all_cores; cpu->name != NULL; cpu++)
7275 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
7277 unsigned long isa_temp = cpu->flags;
7280 if (ext != NULL)
7282 /* TO_PARSE string contains at least one extension. */
7283 enum aarch64_parse_opt_result ext_res
7284 = aarch64_parse_extension (ext, &isa_temp);
7286 if (ext_res != AARCH64_PARSE_OK)
7287 return ext_res;
7289 /* Extension parsing was successfull. Confirm the result
7290 cpu and ISA flags. */
7291 *res = cpu;
7292 *isa_flags = isa_temp;
7293 return AARCH64_PARSE_OK;
7297 /* CPU name not found in list. */
7298 return AARCH64_PARSE_INVALID_ARG;
7301 /* Parse the TO_PARSE string and put the cpu it selects into RES.
7302 Return an aarch64_parse_opt_result describing the parse result.
7303 If the parsing fails the RES does not change. */
7305 static enum aarch64_parse_opt_result
7306 aarch64_parse_tune (const char *to_parse, const struct processor **res)
7308 const struct processor *cpu;
7309 char *str = (char *) alloca (strlen (to_parse) + 1);
7311 strcpy (str, to_parse);
7313 /* Loop through the list of supported CPUs to find a match. */
7314 for (cpu = all_cores; cpu->name != NULL; cpu++)
7316 if (strcmp (cpu->name, str) == 0)
7318 *res = cpu;
7319 return AARCH64_PARSE_OK;
7323 /* CPU name not found in list. */
7324 return AARCH64_PARSE_INVALID_ARG;
7327 /* Parse TOKEN, which has length LENGTH to see if it is an option
7328 described in FLAG. If it is, return the index bit for that fusion type.
7329 If not, error (printing OPTION_NAME) and return zero. */
7331 static unsigned int
7332 aarch64_parse_one_option_token (const char *token,
7333 size_t length,
7334 const struct aarch64_flag_desc *flag,
7335 const char *option_name)
7337 for (; flag->name != NULL; flag++)
7339 if (length == strlen (flag->name)
7340 && !strncmp (flag->name, token, length))
7341 return flag->flag;
7344 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
7345 return 0;
7348 /* Parse OPTION which is a comma-separated list of flags to enable.
7349 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7350 default state we inherit from the CPU tuning structures. OPTION_NAME
7351 gives the top-level option we are parsing in the -moverride string,
7352 for use in error messages. */
7354 static unsigned int
7355 aarch64_parse_boolean_options (const char *option,
7356 const struct aarch64_flag_desc *flags,
7357 unsigned int initial_state,
7358 const char *option_name)
7360 const char separator = '.';
7361 const char* specs = option;
7362 const char* ntoken = option;
7363 unsigned int found_flags = initial_state;
7365 while ((ntoken = strchr (specs, separator)))
7367 size_t token_length = ntoken - specs;
7368 unsigned token_ops = aarch64_parse_one_option_token (specs,
7369 token_length,
7370 flags,
7371 option_name);
7372 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7373 in the token stream, reset the supported operations. So:
7375 adrp+add.cmp+branch.none.adrp+add
7377 would have the result of turning on only adrp+add fusion. */
7378 if (!token_ops)
7379 found_flags = 0;
7381 found_flags |= token_ops;
7382 specs = ++ntoken;
7385 /* We ended with a comma, print something. */
7386 if (!(*specs))
7388 error ("%s string ill-formed\n", option_name);
7389 return 0;
7392 /* We still have one more token to parse. */
7393 size_t token_length = strlen (specs);
7394 unsigned token_ops = aarch64_parse_one_option_token (specs,
7395 token_length,
7396 flags,
7397 option_name);
7398 if (!token_ops)
7399 found_flags = 0;
7401 found_flags |= token_ops;
7402 return found_flags;
7405 /* Support for overriding instruction fusion. */
7407 static void
7408 aarch64_parse_fuse_string (const char *fuse_string,
7409 struct tune_params *tune)
7411 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
7412 aarch64_fusible_pairs,
7413 tune->fusible_ops,
7414 "fuse=");
7417 /* Support for overriding other tuning flags. */
7419 static void
7420 aarch64_parse_tune_string (const char *tune_string,
7421 struct tune_params *tune)
7423 tune->extra_tuning_flags
7424 = aarch64_parse_boolean_options (tune_string,
7425 aarch64_tuning_flags,
7426 tune->extra_tuning_flags,
7427 "tune=");
7430 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7431 we understand. If it is, extract the option string and handoff to
7432 the appropriate function. */
7434 void
7435 aarch64_parse_one_override_token (const char* token,
7436 size_t length,
7437 struct tune_params *tune)
7439 const struct aarch64_tuning_override_function *fn
7440 = aarch64_tuning_override_functions;
7442 const char *option_part = strchr (token, '=');
7443 if (!option_part)
7445 error ("tuning string missing in option (%s)", token);
7446 return;
7449 /* Get the length of the option name. */
7450 length = option_part - token;
7451 /* Skip the '=' to get to the option string. */
7452 option_part++;
7454 for (; fn->name != NULL; fn++)
7456 if (!strncmp (fn->name, token, length))
7458 fn->parse_override (option_part, tune);
7459 return;
7463 error ("unknown tuning option (%s)",token);
7464 return;
7467 /* Parse STRING looking for options in the format:
7468 string :: option:string
7469 option :: name=substring
7470 name :: {a-z}
7471 substring :: defined by option. */
7473 static void
7474 aarch64_parse_override_string (const char* input_string,
7475 struct tune_params* tune)
7477 const char separator = ':';
7478 size_t string_length = strlen (input_string) + 1;
7479 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
7480 char *string = string_root;
7481 strncpy (string, input_string, string_length);
7482 string[string_length - 1] = '\0';
7484 char* ntoken = string;
7486 while ((ntoken = strchr (string, separator)))
7488 size_t token_length = ntoken - string;
7489 /* Make this substring look like a string. */
7490 *ntoken = '\0';
7491 aarch64_parse_one_override_token (string, token_length, tune);
7492 string = ++ntoken;
7495 /* One last option to parse. */
7496 aarch64_parse_one_override_token (string, strlen (string), tune);
7497 free (string_root);
7501 static void
7502 aarch64_override_options_after_change_1 (struct gcc_options *opts)
7504 if (opts->x_flag_omit_frame_pointer)
7505 opts->x_flag_omit_leaf_frame_pointer = false;
7506 else if (opts->x_flag_omit_leaf_frame_pointer)
7507 opts->x_flag_omit_frame_pointer = true;
7509 /* If not opzimizing for size, set the default
7510 alignment to what the target wants. */
7511 if (!opts->x_optimize_size)
7513 if (opts->x_align_loops <= 0)
7514 opts->x_align_loops = aarch64_tune_params.loop_align;
7515 if (opts->x_align_jumps <= 0)
7516 opts->x_align_jumps = aarch64_tune_params.jump_align;
7517 if (opts->x_align_functions <= 0)
7518 opts->x_align_functions = aarch64_tune_params.function_align;
7522 /* 'Unpack' up the internal tuning structs and update the options
7523 in OPTS. The caller must have set up selected_tune and selected_arch
7524 as all the other target-specific codegen decisions are
7525 derived from them. */
7527 void
7528 aarch64_override_options_internal (struct gcc_options *opts)
7530 aarch64_tune_flags = selected_tune->flags;
7531 aarch64_tune = selected_tune->sched_core;
7532 /* Make a copy of the tuning parameters attached to the core, which
7533 we may later overwrite. */
7534 aarch64_tune_params = *(selected_tune->tune);
7535 aarch64_architecture_version = selected_arch->architecture_version;
7537 if (opts->x_aarch64_override_tune_string)
7538 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
7539 &aarch64_tune_params);
7541 /* This target defaults to strict volatile bitfields. */
7542 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
7543 opts->x_flag_strict_volatile_bitfields = 1;
7545 /* -mgeneral-regs-only sets a mask in target_flags, make sure that
7546 aarch64_isa_flags does not contain the FP/SIMD/Crypto feature flags
7547 in case some code tries reading aarch64_isa_flags directly to check if
7548 FP is available. Reuse the aarch64_parse_extension machinery since it
7549 knows how to disable any other flags that fp implies. */
7550 if (TARGET_GENERAL_REGS_ONLY_P (opts->x_target_flags))
7552 /* aarch64_parse_extension takes char* rather than const char* because
7553 it is usually called from within other parsing functions. */
7554 char tmp_str[] = "+nofp";
7555 aarch64_parse_extension (tmp_str, &opts->x_aarch64_isa_flags);
7558 initialize_aarch64_code_model (opts);
7560 aarch64_override_options_after_change_1 (opts);
7563 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
7564 specified in STR and throw errors if appropriate. Put the results if
7565 they are valid in RES and ISA_FLAGS. Return whether the option is
7566 valid. */
7568 static bool
7569 aarch64_validate_mcpu (const char *str, const struct processor **res,
7570 unsigned long *isa_flags)
7572 enum aarch64_parse_opt_result parse_res
7573 = aarch64_parse_cpu (str, res, isa_flags);
7575 if (parse_res == AARCH64_PARSE_OK)
7576 return true;
7578 switch (parse_res)
7580 case AARCH64_PARSE_MISSING_ARG:
7581 error ("missing cpu name in -mcpu=%qs", str);
7582 break;
7583 case AARCH64_PARSE_INVALID_ARG:
7584 error ("unknown value %qs for -mcpu", str);
7585 break;
7586 case AARCH64_PARSE_INVALID_FEATURE:
7587 error ("invalid feature modifier in -mcpu=%qs", str);
7588 break;
7589 default:
7590 gcc_unreachable ();
7593 return false;
7596 /* Validate a command-line -march option. Parse the arch and extensions
7597 (if any) specified in STR and throw errors if appropriate. Put the
7598 results, if they are valid, in RES and ISA_FLAGS. Return whether the
7599 option is valid. */
7601 static bool
7602 aarch64_validate_march (const char *str, const struct processor **res,
7603 unsigned long *isa_flags)
7605 enum aarch64_parse_opt_result parse_res
7606 = aarch64_parse_arch (str, res, isa_flags);
7608 if (parse_res == AARCH64_PARSE_OK)
7609 return true;
7611 switch (parse_res)
7613 case AARCH64_PARSE_MISSING_ARG:
7614 error ("missing arch name in -march=%qs", str);
7615 break;
7616 case AARCH64_PARSE_INVALID_ARG:
7617 error ("unknown value %qs for -march", str);
7618 break;
7619 case AARCH64_PARSE_INVALID_FEATURE:
7620 error ("invalid feature modifier in -march=%qs", str);
7621 break;
7622 default:
7623 gcc_unreachable ();
7626 return false;
7629 /* Validate a command-line -mtune option. Parse the cpu
7630 specified in STR and throw errors if appropriate. Put the
7631 result, if it is valid, in RES. Return whether the option is
7632 valid. */
7634 static bool
7635 aarch64_validate_mtune (const char *str, const struct processor **res)
7637 enum aarch64_parse_opt_result parse_res
7638 = aarch64_parse_tune (str, res);
7640 if (parse_res == AARCH64_PARSE_OK)
7641 return true;
7643 switch (parse_res)
7645 case AARCH64_PARSE_MISSING_ARG:
7646 error ("missing cpu name in -mtune=%qs", str);
7647 break;
7648 case AARCH64_PARSE_INVALID_ARG:
7649 error ("unknown value %qs for -mtune", str);
7650 break;
7651 default:
7652 gcc_unreachable ();
7654 return false;
7657 /* Return the CPU corresponding to the enum CPU.
7658 If it doesn't specify a cpu, return the default. */
7660 static const struct processor *
7661 aarch64_get_tune_cpu (enum aarch64_processor cpu)
7663 if (cpu != aarch64_none)
7664 return &all_cores[cpu];
7666 /* The & 0x3f is to extract the bottom 6 bits that encode the
7667 default cpu as selected by the --with-cpu GCC configure option
7668 in config.gcc.
7669 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
7670 flags mechanism should be reworked to make it more sane. */
7671 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7674 /* Return the architecture corresponding to the enum ARCH.
7675 If it doesn't specify a valid architecture, return the default. */
7677 static const struct processor *
7678 aarch64_get_arch (enum aarch64_arch arch)
7680 if (arch != aarch64_no_arch)
7681 return &all_architectures[arch];
7683 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
7685 return &all_architectures[cpu->arch];
7688 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
7689 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
7690 tuning structs. In particular it must set selected_tune and
7691 aarch64_isa_flags that define the available ISA features and tuning
7692 decisions. It must also set selected_arch as this will be used to
7693 output the .arch asm tags for each function. */
7695 static void
7696 aarch64_override_options (void)
7698 unsigned long cpu_isa = 0;
7699 unsigned long arch_isa = 0;
7700 aarch64_isa_flags = 0;
7702 bool valid_cpu = true;
7703 bool valid_tune = true;
7704 bool valid_arch = true;
7706 selected_cpu = NULL;
7707 selected_arch = NULL;
7708 selected_tune = NULL;
7710 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7711 If either of -march or -mtune is given, they override their
7712 respective component of -mcpu. */
7713 if (aarch64_cpu_string)
7714 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
7715 &cpu_isa);
7717 if (aarch64_arch_string)
7718 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
7719 &arch_isa);
7721 if (aarch64_tune_string)
7722 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
7724 /* If the user did not specify a processor, choose the default
7725 one for them. This will be the CPU set during configuration using
7726 --with-cpu, otherwise it is "generic". */
7727 if (!selected_cpu)
7729 if (selected_arch)
7731 selected_cpu = &all_cores[selected_arch->ident];
7732 aarch64_isa_flags = arch_isa;
7733 explicit_arch = selected_arch->arch;
7735 else
7737 /* Get default configure-time CPU. */
7738 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
7739 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
7742 if (selected_tune)
7743 explicit_tune_core = selected_tune->ident;
7745 /* If both -mcpu and -march are specified check that they are architecturally
7746 compatible, warn if they're not and prefer the -march ISA flags. */
7747 else if (selected_arch)
7749 if (selected_arch->arch != selected_cpu->arch)
7751 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7752 all_architectures[selected_cpu->arch].name,
7753 selected_arch->name);
7755 aarch64_isa_flags = arch_isa;
7756 explicit_arch = selected_arch->arch;
7757 explicit_tune_core = selected_tune ? selected_tune->ident
7758 : selected_cpu->ident;
7760 else
7762 /* -mcpu but no -march. */
7763 aarch64_isa_flags = cpu_isa;
7764 explicit_tune_core = selected_tune ? selected_tune->ident
7765 : selected_cpu->ident;
7766 gcc_assert (selected_cpu);
7767 selected_arch = &all_architectures[selected_cpu->arch];
7768 explicit_arch = selected_arch->arch;
7771 /* Set the arch as well as we will need it when outputing
7772 the .arch directive in assembly. */
7773 if (!selected_arch)
7775 gcc_assert (selected_cpu);
7776 selected_arch = &all_architectures[selected_cpu->arch];
7779 if (!selected_tune)
7780 selected_tune = selected_cpu;
7782 #ifndef HAVE_AS_MABI_OPTION
7783 /* The compiler may have been configured with 2.23.* binutils, which does
7784 not have support for ILP32. */
7785 if (TARGET_ILP32)
7786 error ("Assembler does not support -mabi=ilp32");
7787 #endif
7789 /* Make sure we properly set up the explicit options. */
7790 if ((aarch64_cpu_string && valid_cpu)
7791 || (aarch64_tune_string && valid_tune))
7792 gcc_assert (explicit_tune_core != aarch64_none);
7794 if ((aarch64_cpu_string && valid_cpu)
7795 || (aarch64_arch_string && valid_arch))
7796 gcc_assert (explicit_arch != aarch64_no_arch);
7798 aarch64_build_bitmask_table ();
7800 aarch64_override_options_internal (&global_options);
7802 /* Save these options as the default ones in case we push and pop them later
7803 while processing functions with potential target attributes. */
7804 target_option_default_node = target_option_current_node
7805 = build_target_option_node (&global_options);
7807 aarch64_register_fma_steering ();
7811 /* Implement targetm.override_options_after_change. */
7813 static void
7814 aarch64_override_options_after_change (void)
7816 aarch64_override_options_after_change_1 (&global_options);
7819 static struct machine_function *
7820 aarch64_init_machine_status (void)
7822 struct machine_function *machine;
7823 machine = ggc_cleared_alloc<machine_function> ();
7824 return machine;
7827 void
7828 aarch64_init_expanders (void)
7830 init_machine_status = aarch64_init_machine_status;
7833 /* A checking mechanism for the implementation of the various code models. */
7834 static void
7835 initialize_aarch64_code_model (struct gcc_options *opts)
7837 if (opts->x_flag_pic)
7839 switch (opts->x_aarch64_cmodel_var)
7841 case AARCH64_CMODEL_TINY:
7842 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
7843 break;
7844 case AARCH64_CMODEL_SMALL:
7845 #ifdef HAVE_AS_SMALL_PIC_RELOCS
7846 aarch64_cmodel = (flag_pic == 2
7847 ? AARCH64_CMODEL_SMALL_PIC
7848 : AARCH64_CMODEL_SMALL_SPIC);
7849 #else
7850 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
7851 #endif
7852 break;
7853 case AARCH64_CMODEL_LARGE:
7854 sorry ("code model %qs with -f%s", "large",
7855 opts->x_flag_pic > 1 ? "PIC" : "pic");
7856 default:
7857 gcc_unreachable ();
7860 else
7861 aarch64_cmodel = opts->x_aarch64_cmodel_var;
7864 /* Print to F the architecture features specified by ISA_FLAGS. */
7866 static void
7867 aarch64_print_extension (FILE *f, unsigned long isa_flags)
7869 const struct aarch64_option_extension *opt = NULL;
7871 for (opt = all_extensions; opt->name != NULL; opt++)
7872 if ((isa_flags & opt->flags_on) == opt->flags_on)
7873 asm_fprintf (f, "+%s", opt->name);
7875 asm_fprintf (f, "\n");
7878 /* Implement TARGET_OPTION_SAVE. */
7880 static void
7881 aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
7883 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
7886 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
7887 using the information saved in PTR. */
7889 static void
7890 aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
7892 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
7893 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
7894 opts->x_explicit_arch = ptr->x_explicit_arch;
7895 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
7896 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
7898 aarch64_override_options_internal (opts);
7901 /* Implement TARGET_OPTION_PRINT. */
7903 static void
7904 aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
7906 const struct processor *cpu
7907 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
7908 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
7909 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
7911 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
7912 fprintf (file, "%*sselected arch = %s", indent, "", arch->name);
7913 aarch64_print_extension (file, isa_flags);
7916 static GTY(()) tree aarch64_previous_fndecl;
7918 void
7919 aarch64_reset_previous_fndecl (void)
7921 aarch64_previous_fndecl = NULL;
7924 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
7925 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
7926 of the function, if such exists. This function may be called multiple
7927 times on a single function so use aarch64_previous_fndecl to avoid
7928 setting up identical state. */
7930 static void
7931 aarch64_set_current_function (tree fndecl)
7933 tree old_tree = (aarch64_previous_fndecl
7934 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
7935 : NULL_TREE);
7937 tree new_tree = (fndecl
7938 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
7939 : NULL_TREE);
7942 if (fndecl && fndecl != aarch64_previous_fndecl)
7944 aarch64_previous_fndecl = fndecl;
7945 if (old_tree == new_tree)
7948 else if (new_tree && new_tree != target_option_default_node)
7950 cl_target_option_restore (&global_options,
7951 TREE_TARGET_OPTION (new_tree));
7952 if (TREE_TARGET_GLOBALS (new_tree))
7953 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
7954 else
7955 TREE_TARGET_GLOBALS (new_tree)
7956 = save_target_globals_default_opts ();
7959 else if (old_tree && old_tree != target_option_default_node)
7961 new_tree = target_option_current_node;
7962 cl_target_option_restore (&global_options,
7963 TREE_TARGET_OPTION (new_tree));
7964 if (TREE_TARGET_GLOBALS (new_tree))
7965 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
7966 else if (new_tree == target_option_default_node)
7967 restore_target_globals (&default_target_globals);
7968 else
7969 TREE_TARGET_GLOBALS (new_tree)
7970 = save_target_globals_default_opts ();
7975 /* Enum describing the various ways we can handle attributes.
7976 In many cases we can reuse the generic option handling machinery. */
7978 enum aarch64_attr_opt_type
7980 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
7981 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
7982 aarch64_attr_enum, /* Attribute sets an enum variable. */
7983 aarch64_attr_custom /* Attribute requires a custom handling function. */
7986 /* All the information needed to handle a target attribute.
7987 NAME is the name of the attribute.
7988 ATTR_TYPE specifies the type of behaviour of the attribute as described
7989 in the definition of enum aarch64_attr_opt_type.
7990 ALLOW_NEG is true if the attribute supports a "no-" form.
7991 HANDLER is the function that takes the attribute string and whether
7992 it is a pragma or attribute and handles the option. It is needed only
7993 when the ATTR_TYPE is aarch64_attr_custom.
7994 OPT_NUM is the enum specifying the option that the attribute modifies.
7995 This is needed for attributes that mirror the behaviour of a command-line
7996 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
7997 aarch64_attr_enum. */
7999 struct aarch64_attribute_info
8001 const char *name;
8002 enum aarch64_attr_opt_type attr_type;
8003 bool allow_neg;
8004 bool (*handler) (const char *, const char *);
8005 enum opt_code opt_num;
8008 /* Handle the ARCH_STR argument to the arch= target attribute.
8009 PRAGMA_OR_ATTR is used in potential error messages. */
8011 static bool
8012 aarch64_handle_attr_arch (const char *str, const char *pragma_or_attr)
8014 const struct processor *tmp_arch = NULL;
8015 enum aarch64_parse_opt_result parse_res
8016 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
8018 if (parse_res == AARCH64_PARSE_OK)
8020 gcc_assert (tmp_arch);
8021 selected_arch = tmp_arch;
8022 explicit_arch = selected_arch->arch;
8023 return true;
8026 switch (parse_res)
8028 case AARCH64_PARSE_MISSING_ARG:
8029 error ("missing architecture name in 'arch' target %s", pragma_or_attr);
8030 break;
8031 case AARCH64_PARSE_INVALID_ARG:
8032 error ("unknown value %qs for 'arch' target %s", str, pragma_or_attr);
8033 break;
8034 case AARCH64_PARSE_INVALID_FEATURE:
8035 error ("invalid feature modifier %qs for 'arch' target %s",
8036 str, pragma_or_attr);
8037 break;
8038 default:
8039 gcc_unreachable ();
8042 return false;
8045 /* Handle the argument CPU_STR to the cpu= target attribute.
8046 PRAGMA_OR_ATTR is used in potential error messages. */
8048 static bool
8049 aarch64_handle_attr_cpu (const char *str, const char *pragma_or_attr)
8051 const struct processor *tmp_cpu = NULL;
8052 enum aarch64_parse_opt_result parse_res
8053 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
8055 if (parse_res == AARCH64_PARSE_OK)
8057 gcc_assert (tmp_cpu);
8058 selected_tune = tmp_cpu;
8059 explicit_tune_core = selected_tune->ident;
8061 selected_arch = &all_architectures[tmp_cpu->arch];
8062 explicit_arch = selected_arch->arch;
8063 return true;
8066 switch (parse_res)
8068 case AARCH64_PARSE_MISSING_ARG:
8069 error ("missing cpu name in 'cpu' target %s", pragma_or_attr);
8070 break;
8071 case AARCH64_PARSE_INVALID_ARG:
8072 error ("unknown value %qs for 'cpu' target %s", str, pragma_or_attr);
8073 break;
8074 case AARCH64_PARSE_INVALID_FEATURE:
8075 error ("invalid feature modifier %qs for 'cpu' target %s",
8076 str, pragma_or_attr);
8077 break;
8078 default:
8079 gcc_unreachable ();
8082 return false;
8085 /* Handle the argument STR to the tune= target attribute.
8086 PRAGMA_OR_ATTR is used in potential error messages. */
8088 static bool
8089 aarch64_handle_attr_tune (const char *str, const char *pragma_or_attr)
8091 const struct processor *tmp_tune = NULL;
8092 enum aarch64_parse_opt_result parse_res
8093 = aarch64_parse_tune (str, &tmp_tune);
8095 if (parse_res == AARCH64_PARSE_OK)
8097 gcc_assert (tmp_tune);
8098 selected_tune = tmp_tune;
8099 explicit_tune_core = selected_tune->ident;
8100 return true;
8103 switch (parse_res)
8105 case AARCH64_PARSE_INVALID_ARG:
8106 error ("unknown value %qs for 'tune' target %s", str, pragma_or_attr);
8107 break;
8108 default:
8109 gcc_unreachable ();
8112 return false;
8115 /* Parse an architecture extensions target attribute string specified in STR.
8116 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8117 if successful. Update aarch64_isa_flags to reflect the ISA features
8118 modified.
8119 PRAGMA_OR_ATTR is used in potential error messages. */
8121 static bool
8122 aarch64_handle_attr_isa_flags (char *str, const char *pragma_or_attr)
8124 enum aarch64_parse_opt_result parse_res;
8125 unsigned long isa_flags = aarch64_isa_flags;
8127 /* We allow "+nothing" in the beginning to clear out all architectural
8128 features if the user wants to handpick specific features. */
8129 if (strncmp ("+nothing", str, 8) == 0)
8131 isa_flags = 0;
8132 str += 8;
8135 parse_res = aarch64_parse_extension (str, &isa_flags);
8137 if (parse_res == AARCH64_PARSE_OK)
8139 aarch64_isa_flags = isa_flags;
8140 return true;
8143 switch (parse_res)
8145 case AARCH64_PARSE_MISSING_ARG:
8146 error ("missing feature modifier in target %s %qs",
8147 pragma_or_attr, str);
8148 break;
8150 case AARCH64_PARSE_INVALID_FEATURE:
8151 error ("invalid feature modifier in target %s %qs",
8152 pragma_or_attr, str);
8153 break;
8155 default:
8156 gcc_unreachable ();
8159 return false;
8162 /* The target attributes that we support. On top of these we also support just
8163 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8164 handled explicitly in aarch64_process_one_target_attr. */
8166 static const struct aarch64_attribute_info aarch64_attributes[] =
8168 { "general-regs-only", aarch64_attr_mask, false, NULL,
8169 OPT_mgeneral_regs_only },
8170 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
8171 OPT_mfix_cortex_a53_835769 },
8172 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
8173 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
8174 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
8175 OPT_momit_leaf_frame_pointer },
8176 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
8177 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
8178 OPT_march_ },
8179 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
8180 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
8181 OPT_mtune_ },
8182 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
8185 /* Parse ARG_STR which contains the definition of one target attribute.
8186 Show appropriate errors if any or return true if the attribute is valid.
8187 PRAGMA_OR_ATTR holds the string to use in error messages about whether
8188 we're processing a target attribute or pragma. */
8190 static bool
8191 aarch64_process_one_target_attr (char *arg_str, const char* pragma_or_attr)
8193 bool invert = false;
8195 size_t len = strlen (arg_str);
8197 if (len == 0)
8199 error ("malformed target %s", pragma_or_attr);
8200 return false;
8203 char *str_to_check = (char *) alloca (len + 1);
8204 strcpy (str_to_check, arg_str);
8206 /* Skip leading whitespace. */
8207 while (*str_to_check == ' ' || *str_to_check == '\t')
8208 str_to_check++;
8210 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
8211 It is easier to detect and handle it explicitly here rather than going
8212 through the machinery for the rest of the target attributes in this
8213 function. */
8214 if (*str_to_check == '+')
8215 return aarch64_handle_attr_isa_flags (str_to_check, pragma_or_attr);
8217 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
8219 invert = true;
8220 str_to_check += 3;
8222 char *arg = strchr (str_to_check, '=');
8224 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
8225 and point ARG to "foo". */
8226 if (arg)
8228 *arg = '\0';
8229 arg++;
8231 const struct aarch64_attribute_info *p_attr;
8232 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
8234 /* If the names don't match up, or the user has given an argument
8235 to an attribute that doesn't accept one, or didn't give an argument
8236 to an attribute that expects one, fail to match. */
8237 if (strcmp (str_to_check, p_attr->name) != 0)
8238 continue;
8240 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
8241 || p_attr->attr_type == aarch64_attr_enum;
8243 if (attr_need_arg_p ^ (arg != NULL))
8245 error ("target %s %qs does not accept an argument",
8246 pragma_or_attr, str_to_check);
8247 return false;
8250 /* If the name matches but the attribute does not allow "no-" versions
8251 then we can't match. */
8252 if (invert && !p_attr->allow_neg)
8254 error ("target %s %qs does not allow a negated form",
8255 pragma_or_attr, str_to_check);
8256 return false;
8259 switch (p_attr->attr_type)
8261 /* Has a custom handler registered.
8262 For example, cpu=, arch=, tune=. */
8263 case aarch64_attr_custom:
8264 gcc_assert (p_attr->handler);
8265 if (!p_attr->handler (arg, pragma_or_attr))
8266 return false;
8267 break;
8269 /* Either set or unset a boolean option. */
8270 case aarch64_attr_bool:
8272 struct cl_decoded_option decoded;
8274 generate_option (p_attr->opt_num, NULL, !invert,
8275 CL_TARGET, &decoded);
8276 aarch64_handle_option (&global_options, &global_options_set,
8277 &decoded, input_location);
8278 break;
8280 /* Set or unset a bit in the target_flags. aarch64_handle_option
8281 should know what mask to apply given the option number. */
8282 case aarch64_attr_mask:
8284 struct cl_decoded_option decoded;
8285 /* We only need to specify the option number.
8286 aarch64_handle_option will know which mask to apply. */
8287 decoded.opt_index = p_attr->opt_num;
8288 decoded.value = !invert;
8289 aarch64_handle_option (&global_options, &global_options_set,
8290 &decoded, input_location);
8291 break;
8293 /* Use the option setting machinery to set an option to an enum. */
8294 case aarch64_attr_enum:
8296 gcc_assert (arg);
8297 bool valid;
8298 int value;
8299 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
8300 &value, CL_TARGET);
8301 if (valid)
8303 set_option (&global_options, NULL, p_attr->opt_num, value,
8304 NULL, DK_UNSPECIFIED, input_location,
8305 global_dc);
8307 else
8309 error ("target %s %s=%s is not valid",
8310 pragma_or_attr, str_to_check, arg);
8312 break;
8314 default:
8315 gcc_unreachable ();
8319 return true;
8322 /* Count how many times the character C appears in
8323 NULL-terminated string STR. */
8325 static unsigned int
8326 num_occurences_in_str (char c, char *str)
8328 unsigned int res = 0;
8329 while (*str != '\0')
8331 if (*str == c)
8332 res++;
8334 str++;
8337 return res;
8340 /* Parse the tree in ARGS that contains the target attribute information
8341 and update the global target options space. PRAGMA_OR_ATTR is a string
8342 to be used in error messages, specifying whether this is processing
8343 a target attribute or a target pragma. */
8345 bool
8346 aarch64_process_target_attr (tree args, const char* pragma_or_attr)
8348 if (TREE_CODE (args) == TREE_LIST)
8352 tree head = TREE_VALUE (args);
8353 if (head)
8355 if (!aarch64_process_target_attr (head, pragma_or_attr))
8356 return false;
8358 args = TREE_CHAIN (args);
8359 } while (args);
8361 return true;
8363 /* We expect to find a string to parse. */
8364 gcc_assert (TREE_CODE (args) == STRING_CST);
8366 size_t len = strlen (TREE_STRING_POINTER (args));
8367 char *str_to_check = (char *) alloca (len + 1);
8368 strcpy (str_to_check, TREE_STRING_POINTER (args));
8370 if (len == 0)
8372 error ("malformed target %s value", pragma_or_attr);
8373 return false;
8376 /* Used to catch empty spaces between commas i.e.
8377 attribute ((target ("attr1,,attr2"))). */
8378 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
8380 /* Handle multiple target attributes separated by ','. */
8381 char *token = strtok (str_to_check, ",");
8383 unsigned int num_attrs = 0;
8384 while (token)
8386 num_attrs++;
8387 if (!aarch64_process_one_target_attr (token, pragma_or_attr))
8389 error ("target %s %qs is invalid", pragma_or_attr, token);
8390 return false;
8393 token = strtok (NULL, ",");
8396 if (num_attrs != num_commas + 1)
8398 error ("malformed target %s list %qs",
8399 pragma_or_attr, TREE_STRING_POINTER (args));
8400 return false;
8403 return true;
8406 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
8407 process attribute ((target ("..."))). */
8409 static bool
8410 aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
8412 struct cl_target_option cur_target;
8413 bool ret;
8414 tree old_optimize;
8415 tree new_target, new_optimize;
8416 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
8418 /* If what we're processing is the current pragma string then the
8419 target option node is already stored in target_option_current_node
8420 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
8421 having to re-parse the string. This is especially useful to keep
8422 arm_neon.h compile times down since that header contains a lot
8423 of intrinsics enclosed in pragmas. */
8424 if (!existing_target && args == current_target_pragma)
8426 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
8427 return true;
8429 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
8431 old_optimize = build_optimization_node (&global_options);
8432 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
8434 /* If the function changed the optimization levels as well as setting
8435 target options, start with the optimizations specified. */
8436 if (func_optimize && func_optimize != old_optimize)
8437 cl_optimization_restore (&global_options,
8438 TREE_OPTIMIZATION (func_optimize));
8440 /* Save the current target options to restore at the end. */
8441 cl_target_option_save (&cur_target, &global_options);
8443 /* If fndecl already has some target attributes applied to it, unpack
8444 them so that we add this attribute on top of them, rather than
8445 overwriting them. */
8446 if (existing_target)
8448 struct cl_target_option *existing_options
8449 = TREE_TARGET_OPTION (existing_target);
8451 if (existing_options)
8452 cl_target_option_restore (&global_options, existing_options);
8454 else
8455 cl_target_option_restore (&global_options,
8456 TREE_TARGET_OPTION (target_option_current_node));
8459 ret = aarch64_process_target_attr (args, "attribute");
8461 /* Set up any additional state. */
8462 if (ret)
8464 aarch64_override_options_internal (&global_options);
8465 /* Initialize SIMD builtins if we haven't already.
8466 Set current_target_pragma to NULL for the duration so that
8467 the builtin initialization code doesn't try to tag the functions
8468 being built with the attributes specified by any current pragma, thus
8469 going into an infinite recursion. */
8470 if (TARGET_SIMD)
8472 tree saved_current_target_pragma = current_target_pragma;
8473 current_target_pragma = NULL;
8474 aarch64_init_simd_builtins ();
8475 current_target_pragma = saved_current_target_pragma;
8477 new_target = build_target_option_node (&global_options);
8479 else
8480 new_target = NULL;
8482 new_optimize = build_optimization_node (&global_options);
8484 if (fndecl && ret)
8486 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
8488 if (old_optimize != new_optimize)
8489 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
8492 cl_target_option_restore (&global_options, &cur_target);
8494 if (old_optimize != new_optimize)
8495 cl_optimization_restore (&global_options,
8496 TREE_OPTIMIZATION (old_optimize));
8497 return ret;
8500 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
8501 tri-bool options (yes, no, don't care) and the default value is
8502 DEF, determine whether to reject inlining. */
8504 static bool
8505 aarch64_tribools_ok_for_inlining_p (int caller, int callee,
8506 int dont_care, int def)
8508 /* If the callee doesn't care, always allow inlining. */
8509 if (callee == dont_care)
8510 return true;
8512 /* If the caller doesn't care, always allow inlining. */
8513 if (caller == dont_care)
8514 return true;
8516 /* Otherwise, allow inlining if either the callee and caller values
8517 agree, or if the callee is using the default value. */
8518 return (callee == caller || callee == def);
8521 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
8522 to inline CALLEE into CALLER based on target-specific info.
8523 Make sure that the caller and callee have compatible architectural
8524 features. Then go through the other possible target attributes
8525 and see if they can block inlining. Try not to reject always_inline
8526 callees unless they are incompatible architecturally. */
8528 static bool
8529 aarch64_can_inline_p (tree caller, tree callee)
8531 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
8532 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
8534 /* If callee has no option attributes, then it is ok to inline. */
8535 if (!callee_tree)
8536 return true;
8538 struct cl_target_option *caller_opts
8539 = TREE_TARGET_OPTION (caller_tree ? caller_tree
8540 : target_option_default_node);
8542 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
8545 /* Callee's ISA flags should be a subset of the caller's. */
8546 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
8547 != callee_opts->x_aarch64_isa_flags)
8548 return false;
8550 /* Allow non-strict aligned functions inlining into strict
8551 aligned ones. */
8552 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
8553 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
8554 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
8555 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
8556 return false;
8558 bool always_inline = lookup_attribute ("always_inline",
8559 DECL_ATTRIBUTES (callee));
8561 /* If the architectural features match up and the callee is always_inline
8562 then the other attributes don't matter. */
8563 if (always_inline)
8564 return true;
8566 if (caller_opts->x_aarch64_cmodel_var
8567 != callee_opts->x_aarch64_cmodel_var)
8568 return false;
8570 if (caller_opts->x_aarch64_tls_dialect
8571 != callee_opts->x_aarch64_tls_dialect)
8572 return false;
8574 /* Honour explicit requests to workaround errata. */
8575 if (!aarch64_tribools_ok_for_inlining_p (
8576 caller_opts->x_aarch64_fix_a53_err835769,
8577 callee_opts->x_aarch64_fix_a53_err835769,
8578 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
8579 return false;
8581 /* If the user explicitly specified -momit-leaf-frame-pointer for the
8582 caller and calle and they don't match up, reject inlining. */
8583 if (!aarch64_tribools_ok_for_inlining_p (
8584 caller_opts->x_flag_omit_leaf_frame_pointer,
8585 callee_opts->x_flag_omit_leaf_frame_pointer,
8586 2, 1))
8587 return false;
8589 /* If the callee has specific tuning overrides, respect them. */
8590 if (callee_opts->x_aarch64_override_tune_string != NULL
8591 && caller_opts->x_aarch64_override_tune_string == NULL)
8592 return false;
8594 /* If the user specified tuning override strings for the
8595 caller and callee and they don't match up, reject inlining.
8596 We just do a string compare here, we don't analyze the meaning
8597 of the string, as it would be too costly for little gain. */
8598 if (callee_opts->x_aarch64_override_tune_string
8599 && caller_opts->x_aarch64_override_tune_string
8600 && (strcmp (callee_opts->x_aarch64_override_tune_string,
8601 caller_opts->x_aarch64_override_tune_string) != 0))
8602 return false;
8604 return true;
8607 /* Return true if SYMBOL_REF X binds locally. */
8609 static bool
8610 aarch64_symbol_binds_local_p (const_rtx x)
8612 return (SYMBOL_REF_DECL (x)
8613 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
8614 : SYMBOL_REF_LOCAL_P (x));
8617 /* Return true if SYMBOL_REF X is thread local */
8618 static bool
8619 aarch64_tls_symbol_p (rtx x)
8621 if (! TARGET_HAVE_TLS)
8622 return false;
8624 if (GET_CODE (x) != SYMBOL_REF)
8625 return false;
8627 return SYMBOL_REF_TLS_MODEL (x) != 0;
8630 /* Classify a TLS symbol into one of the TLS kinds. */
8631 enum aarch64_symbol_type
8632 aarch64_classify_tls_symbol (rtx x)
8634 enum tls_model tls_kind = tls_symbolic_operand_type (x);
8636 switch (tls_kind)
8638 case TLS_MODEL_GLOBAL_DYNAMIC:
8639 case TLS_MODEL_LOCAL_DYNAMIC:
8640 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
8642 case TLS_MODEL_INITIAL_EXEC:
8643 return SYMBOL_SMALL_GOTTPREL;
8645 case TLS_MODEL_LOCAL_EXEC:
8646 return SYMBOL_TLSLE;
8648 case TLS_MODEL_EMULATED:
8649 case TLS_MODEL_NONE:
8650 return SYMBOL_FORCE_TO_MEM;
8652 default:
8653 gcc_unreachable ();
8657 /* Return the method that should be used to access SYMBOL_REF or
8658 LABEL_REF X in context CONTEXT. */
8660 enum aarch64_symbol_type
8661 aarch64_classify_symbol (rtx x, rtx offset,
8662 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
8664 if (GET_CODE (x) == LABEL_REF)
8666 switch (aarch64_cmodel)
8668 case AARCH64_CMODEL_LARGE:
8669 return SYMBOL_FORCE_TO_MEM;
8671 case AARCH64_CMODEL_TINY_PIC:
8672 case AARCH64_CMODEL_TINY:
8673 return SYMBOL_TINY_ABSOLUTE;
8675 case AARCH64_CMODEL_SMALL_SPIC:
8676 case AARCH64_CMODEL_SMALL_PIC:
8677 case AARCH64_CMODEL_SMALL:
8678 return SYMBOL_SMALL_ABSOLUTE;
8680 default:
8681 gcc_unreachable ();
8685 if (GET_CODE (x) == SYMBOL_REF)
8687 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
8688 return SYMBOL_FORCE_TO_MEM;
8690 if (aarch64_tls_symbol_p (x))
8691 return aarch64_classify_tls_symbol (x);
8693 switch (aarch64_cmodel)
8695 case AARCH64_CMODEL_TINY:
8696 /* When we retreive symbol + offset address, we have to make sure
8697 the offset does not cause overflow of the final address. But
8698 we have no way of knowing the address of symbol at compile time
8699 so we can't accurately say if the distance between the PC and
8700 symbol + offset is outside the addressible range of +/-1M in the
8701 TINY code model. So we rely on images not being greater than
8702 1M and cap the offset at 1M and anything beyond 1M will have to
8703 be loaded using an alternative mechanism. */
8704 if (SYMBOL_REF_WEAK (x)
8705 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
8706 return SYMBOL_FORCE_TO_MEM;
8707 return SYMBOL_TINY_ABSOLUTE;
8709 case AARCH64_CMODEL_SMALL:
8710 /* Same reasoning as the tiny code model, but the offset cap here is
8711 4G. */
8712 if (SYMBOL_REF_WEAK (x)
8713 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
8714 HOST_WIDE_INT_C (4294967264)))
8715 return SYMBOL_FORCE_TO_MEM;
8716 return SYMBOL_SMALL_ABSOLUTE;
8718 case AARCH64_CMODEL_TINY_PIC:
8719 if (!aarch64_symbol_binds_local_p (x))
8720 return SYMBOL_TINY_GOT;
8721 return SYMBOL_TINY_ABSOLUTE;
8723 case AARCH64_CMODEL_SMALL_SPIC:
8724 case AARCH64_CMODEL_SMALL_PIC:
8725 if (!aarch64_symbol_binds_local_p (x))
8726 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
8727 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
8728 return SYMBOL_SMALL_ABSOLUTE;
8730 default:
8731 gcc_unreachable ();
8735 /* By default push everything into the constant pool. */
8736 return SYMBOL_FORCE_TO_MEM;
8739 bool
8740 aarch64_constant_address_p (rtx x)
8742 return (CONSTANT_P (x) && memory_address_p (DImode, x));
8745 bool
8746 aarch64_legitimate_pic_operand_p (rtx x)
8748 if (GET_CODE (x) == SYMBOL_REF
8749 || (GET_CODE (x) == CONST
8750 && GET_CODE (XEXP (x, 0)) == PLUS
8751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8752 return false;
8754 return true;
8757 /* Return true if X holds either a quarter-precision or
8758 floating-point +0.0 constant. */
8759 static bool
8760 aarch64_valid_floating_const (machine_mode mode, rtx x)
8762 if (!CONST_DOUBLE_P (x))
8763 return false;
8765 if (aarch64_float_const_zero_rtx_p (x))
8766 return true;
8768 /* We only handle moving 0.0 to a TFmode register. */
8769 if (!(mode == SFmode || mode == DFmode))
8770 return false;
8772 return aarch64_float_const_representable_p (x);
8775 static bool
8776 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
8778 /* Do not allow vector struct mode constants. We could support
8779 0 and -1 easily, but they need support in aarch64-simd.md. */
8780 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
8781 return false;
8783 /* This could probably go away because
8784 we now decompose CONST_INTs according to expand_mov_immediate. */
8785 if ((GET_CODE (x) == CONST_VECTOR
8786 && aarch64_simd_valid_immediate (x, mode, false, NULL))
8787 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
8788 return !targetm.cannot_force_const_mem (mode, x);
8790 if (GET_CODE (x) == HIGH
8791 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8792 return true;
8794 return aarch64_constant_address_p (x);
8798 aarch64_load_tp (rtx target)
8800 if (!target
8801 || GET_MODE (target) != Pmode
8802 || !register_operand (target, Pmode))
8803 target = gen_reg_rtx (Pmode);
8805 /* Can return in any reg. */
8806 emit_insn (gen_aarch64_load_tp_hard (target));
8807 return target;
8810 /* On AAPCS systems, this is the "struct __va_list". */
8811 static GTY(()) tree va_list_type;
8813 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
8814 Return the type to use as __builtin_va_list.
8816 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
8818 struct __va_list
8820 void *__stack;
8821 void *__gr_top;
8822 void *__vr_top;
8823 int __gr_offs;
8824 int __vr_offs;
8825 }; */
8827 static tree
8828 aarch64_build_builtin_va_list (void)
8830 tree va_list_name;
8831 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8833 /* Create the type. */
8834 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
8835 /* Give it the required name. */
8836 va_list_name = build_decl (BUILTINS_LOCATION,
8837 TYPE_DECL,
8838 get_identifier ("__va_list"),
8839 va_list_type);
8840 DECL_ARTIFICIAL (va_list_name) = 1;
8841 TYPE_NAME (va_list_type) = va_list_name;
8842 TYPE_STUB_DECL (va_list_type) = va_list_name;
8844 /* Create the fields. */
8845 f_stack = build_decl (BUILTINS_LOCATION,
8846 FIELD_DECL, get_identifier ("__stack"),
8847 ptr_type_node);
8848 f_grtop = build_decl (BUILTINS_LOCATION,
8849 FIELD_DECL, get_identifier ("__gr_top"),
8850 ptr_type_node);
8851 f_vrtop = build_decl (BUILTINS_LOCATION,
8852 FIELD_DECL, get_identifier ("__vr_top"),
8853 ptr_type_node);
8854 f_groff = build_decl (BUILTINS_LOCATION,
8855 FIELD_DECL, get_identifier ("__gr_offs"),
8856 integer_type_node);
8857 f_vroff = build_decl (BUILTINS_LOCATION,
8858 FIELD_DECL, get_identifier ("__vr_offs"),
8859 integer_type_node);
8861 DECL_ARTIFICIAL (f_stack) = 1;
8862 DECL_ARTIFICIAL (f_grtop) = 1;
8863 DECL_ARTIFICIAL (f_vrtop) = 1;
8864 DECL_ARTIFICIAL (f_groff) = 1;
8865 DECL_ARTIFICIAL (f_vroff) = 1;
8867 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
8868 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
8869 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
8870 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
8871 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
8873 TYPE_FIELDS (va_list_type) = f_stack;
8874 DECL_CHAIN (f_stack) = f_grtop;
8875 DECL_CHAIN (f_grtop) = f_vrtop;
8876 DECL_CHAIN (f_vrtop) = f_groff;
8877 DECL_CHAIN (f_groff) = f_vroff;
8879 /* Compute its layout. */
8880 layout_type (va_list_type);
8882 return va_list_type;
8885 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
8886 static void
8887 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
8889 const CUMULATIVE_ARGS *cum;
8890 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8891 tree stack, grtop, vrtop, groff, vroff;
8892 tree t;
8893 int gr_save_area_size;
8894 int vr_save_area_size;
8895 int vr_offset;
8897 cum = &crtl->args.info;
8898 gr_save_area_size
8899 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
8900 vr_save_area_size
8901 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
8903 if (!TARGET_FLOAT)
8905 gcc_assert (cum->aapcs_nvrn == 0);
8906 vr_save_area_size = 0;
8909 f_stack = TYPE_FIELDS (va_list_type_node);
8910 f_grtop = DECL_CHAIN (f_stack);
8911 f_vrtop = DECL_CHAIN (f_grtop);
8912 f_groff = DECL_CHAIN (f_vrtop);
8913 f_vroff = DECL_CHAIN (f_groff);
8915 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
8916 NULL_TREE);
8917 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
8918 NULL_TREE);
8919 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
8920 NULL_TREE);
8921 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
8922 NULL_TREE);
8923 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
8924 NULL_TREE);
8926 /* Emit code to initialize STACK, which points to the next varargs stack
8927 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
8928 by named arguments. STACK is 8-byte aligned. */
8929 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
8930 if (cum->aapcs_stack_size > 0)
8931 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
8932 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
8933 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8935 /* Emit code to initialize GRTOP, the top of the GR save area.
8936 virtual_incoming_args_rtx should have been 16 byte aligned. */
8937 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
8938 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
8939 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8941 /* Emit code to initialize VRTOP, the top of the VR save area.
8942 This address is gr_save_area_bytes below GRTOP, rounded
8943 down to the next 16-byte boundary. */
8944 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
8945 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
8946 STACK_BOUNDARY / BITS_PER_UNIT);
8948 if (vr_offset)
8949 t = fold_build_pointer_plus_hwi (t, -vr_offset);
8950 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
8951 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8953 /* Emit code to initialize GROFF, the offset from GRTOP of the
8954 next GPR argument. */
8955 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
8956 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
8957 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8959 /* Likewise emit code to initialize VROFF, the offset from FTOP
8960 of the next VR argument. */
8961 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
8962 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
8963 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8966 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
8968 static tree
8969 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8970 gimple_seq *post_p ATTRIBUTE_UNUSED)
8972 tree addr;
8973 bool indirect_p;
8974 bool is_ha; /* is HFA or HVA. */
8975 bool dw_align; /* double-word align. */
8976 machine_mode ag_mode = VOIDmode;
8977 int nregs;
8978 machine_mode mode;
8980 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
8981 tree stack, f_top, f_off, off, arg, roundup, on_stack;
8982 HOST_WIDE_INT size, rsize, adjust, align;
8983 tree t, u, cond1, cond2;
8985 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8986 if (indirect_p)
8987 type = build_pointer_type (type);
8989 mode = TYPE_MODE (type);
8991 f_stack = TYPE_FIELDS (va_list_type_node);
8992 f_grtop = DECL_CHAIN (f_stack);
8993 f_vrtop = DECL_CHAIN (f_grtop);
8994 f_groff = DECL_CHAIN (f_vrtop);
8995 f_vroff = DECL_CHAIN (f_groff);
8997 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
8998 f_stack, NULL_TREE);
8999 size = int_size_in_bytes (type);
9000 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
9002 dw_align = false;
9003 adjust = 0;
9004 if (aarch64_vfp_is_call_or_return_candidate (mode,
9005 type,
9006 &ag_mode,
9007 &nregs,
9008 &is_ha))
9010 /* TYPE passed in fp/simd registers. */
9011 if (!TARGET_FLOAT)
9012 aarch64_err_no_fpadvsimd (mode, "varargs");
9014 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
9015 unshare_expr (valist), f_vrtop, NULL_TREE);
9016 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
9017 unshare_expr (valist), f_vroff, NULL_TREE);
9019 rsize = nregs * UNITS_PER_VREG;
9021 if (is_ha)
9023 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
9024 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
9026 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
9027 && size < UNITS_PER_VREG)
9029 adjust = UNITS_PER_VREG - size;
9032 else
9034 /* TYPE passed in general registers. */
9035 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
9036 unshare_expr (valist), f_grtop, NULL_TREE);
9037 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
9038 unshare_expr (valist), f_groff, NULL_TREE);
9039 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
9040 nregs = rsize / UNITS_PER_WORD;
9042 if (align > 8)
9043 dw_align = true;
9045 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9046 && size < UNITS_PER_WORD)
9048 adjust = UNITS_PER_WORD - size;
9052 /* Get a local temporary for the field value. */
9053 off = get_initialized_tmp_var (f_off, pre_p, NULL);
9055 /* Emit code to branch if off >= 0. */
9056 t = build2 (GE_EXPR, boolean_type_node, off,
9057 build_int_cst (TREE_TYPE (off), 0));
9058 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
9060 if (dw_align)
9062 /* Emit: offs = (offs + 15) & -16. */
9063 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9064 build_int_cst (TREE_TYPE (off), 15));
9065 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
9066 build_int_cst (TREE_TYPE (off), -16));
9067 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
9069 else
9070 roundup = NULL;
9072 /* Update ap.__[g|v]r_offs */
9073 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
9074 build_int_cst (TREE_TYPE (off), rsize));
9075 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
9077 /* String up. */
9078 if (roundup)
9079 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9081 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9082 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
9083 build_int_cst (TREE_TYPE (f_off), 0));
9084 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
9086 /* String up: make sure the assignment happens before the use. */
9087 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
9088 COND_EXPR_ELSE (cond1) = t;
9090 /* Prepare the trees handling the argument that is passed on the stack;
9091 the top level node will store in ON_STACK. */
9092 arg = get_initialized_tmp_var (stack, pre_p, NULL);
9093 if (align > 8)
9095 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9096 t = fold_convert (intDI_type_node, arg);
9097 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9098 build_int_cst (TREE_TYPE (t), 15));
9099 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9100 build_int_cst (TREE_TYPE (t), -16));
9101 t = fold_convert (TREE_TYPE (arg), t);
9102 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
9104 else
9105 roundup = NULL;
9106 /* Advance ap.__stack */
9107 t = fold_convert (intDI_type_node, arg);
9108 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
9109 build_int_cst (TREE_TYPE (t), size + 7));
9110 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9111 build_int_cst (TREE_TYPE (t), -8));
9112 t = fold_convert (TREE_TYPE (arg), t);
9113 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
9114 /* String up roundup and advance. */
9115 if (roundup)
9116 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
9117 /* String up with arg */
9118 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
9119 /* Big-endianness related address adjustment. */
9120 if (BLOCK_REG_PADDING (mode, type, 1) == downward
9121 && size < UNITS_PER_WORD)
9123 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
9124 size_int (UNITS_PER_WORD - size));
9125 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
9128 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
9129 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
9131 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9132 t = off;
9133 if (adjust)
9134 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
9135 build_int_cst (TREE_TYPE (off), adjust));
9137 t = fold_convert (sizetype, t);
9138 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
9140 if (is_ha)
9142 /* type ha; // treat as "struct {ftype field[n];}"
9143 ... [computing offs]
9144 for (i = 0; i <nregs; ++i, offs += 16)
9145 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
9146 return ha; */
9147 int i;
9148 tree tmp_ha, field_t, field_ptr_t;
9150 /* Declare a local variable. */
9151 tmp_ha = create_tmp_var_raw (type, "ha");
9152 gimple_add_tmp_var (tmp_ha);
9154 /* Establish the base type. */
9155 switch (ag_mode)
9157 case SFmode:
9158 field_t = float_type_node;
9159 field_ptr_t = float_ptr_type_node;
9160 break;
9161 case DFmode:
9162 field_t = double_type_node;
9163 field_ptr_t = double_ptr_type_node;
9164 break;
9165 case TFmode:
9166 field_t = long_double_type_node;
9167 field_ptr_t = long_double_ptr_type_node;
9168 break;
9169 /* The half precision and quad precision are not fully supported yet. Enable
9170 the following code after the support is complete. Need to find the correct
9171 type node for __fp16 *. */
9172 #if 0
9173 case HFmode:
9174 field_t = float_type_node;
9175 field_ptr_t = float_ptr_type_node;
9176 break;
9177 #endif
9178 case V2SImode:
9179 case V4SImode:
9181 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
9182 field_t = build_vector_type_for_mode (innertype, ag_mode);
9183 field_ptr_t = build_pointer_type (field_t);
9185 break;
9186 default:
9187 gcc_assert (0);
9190 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
9191 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
9192 addr = t;
9193 t = fold_convert (field_ptr_t, addr);
9194 t = build2 (MODIFY_EXPR, field_t,
9195 build1 (INDIRECT_REF, field_t, tmp_ha),
9196 build1 (INDIRECT_REF, field_t, t));
9198 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
9199 for (i = 1; i < nregs; ++i)
9201 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
9202 u = fold_convert (field_ptr_t, addr);
9203 u = build2 (MODIFY_EXPR, field_t,
9204 build2 (MEM_REF, field_t, tmp_ha,
9205 build_int_cst (field_ptr_t,
9206 (i *
9207 int_size_in_bytes (field_t)))),
9208 build1 (INDIRECT_REF, field_t, u));
9209 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
9212 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
9213 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
9216 COND_EXPR_ELSE (cond2) = t;
9217 addr = fold_convert (build_pointer_type (type), cond1);
9218 addr = build_va_arg_indirect_ref (addr);
9220 if (indirect_p)
9221 addr = build_va_arg_indirect_ref (addr);
9223 return addr;
9226 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
9228 static void
9229 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
9230 tree type, int *pretend_size ATTRIBUTE_UNUSED,
9231 int no_rtl)
9233 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9234 CUMULATIVE_ARGS local_cum;
9235 int gr_saved, vr_saved;
9237 /* The caller has advanced CUM up to, but not beyond, the last named
9238 argument. Advance a local copy of CUM past the last "real" named
9239 argument, to find out how many registers are left over. */
9240 local_cum = *cum;
9241 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
9243 /* Found out how many registers we need to save. */
9244 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
9245 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
9247 if (!TARGET_FLOAT)
9249 gcc_assert (local_cum.aapcs_nvrn == 0);
9250 vr_saved = 0;
9253 if (!no_rtl)
9255 if (gr_saved > 0)
9257 rtx ptr, mem;
9259 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
9260 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
9261 - gr_saved * UNITS_PER_WORD);
9262 mem = gen_frame_mem (BLKmode, ptr);
9263 set_mem_alias_set (mem, get_varargs_alias_set ());
9265 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
9266 mem, gr_saved);
9268 if (vr_saved > 0)
9270 /* We can't use move_block_from_reg, because it will use
9271 the wrong mode, storing D regs only. */
9272 machine_mode mode = TImode;
9273 int off, i;
9275 /* Set OFF to the offset from virtual_incoming_args_rtx of
9276 the first vector register. The VR save area lies below
9277 the GR one, and is aligned to 16 bytes. */
9278 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
9279 STACK_BOUNDARY / BITS_PER_UNIT);
9280 off -= vr_saved * UNITS_PER_VREG;
9282 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
9284 rtx ptr, mem;
9286 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
9287 mem = gen_frame_mem (mode, ptr);
9288 set_mem_alias_set (mem, get_varargs_alias_set ());
9289 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
9290 off += UNITS_PER_VREG;
9295 /* We don't save the size into *PRETEND_SIZE because we want to avoid
9296 any complication of having crtl->args.pretend_args_size changed. */
9297 cfun->machine->frame.saved_varargs_size
9298 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
9299 STACK_BOUNDARY / BITS_PER_UNIT)
9300 + vr_saved * UNITS_PER_VREG);
9303 static void
9304 aarch64_conditional_register_usage (void)
9306 int i;
9307 if (!TARGET_FLOAT)
9309 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
9311 fixed_regs[i] = 1;
9312 call_used_regs[i] = 1;
9317 /* Walk down the type tree of TYPE counting consecutive base elements.
9318 If *MODEP is VOIDmode, then set it to the first valid floating point
9319 type. If a non-floating point type is found, or if a floating point
9320 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
9321 otherwise return the count in the sub-tree. */
9322 static int
9323 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
9325 machine_mode mode;
9326 HOST_WIDE_INT size;
9328 switch (TREE_CODE (type))
9330 case REAL_TYPE:
9331 mode = TYPE_MODE (type);
9332 if (mode != DFmode && mode != SFmode && mode != TFmode)
9333 return -1;
9335 if (*modep == VOIDmode)
9336 *modep = mode;
9338 if (*modep == mode)
9339 return 1;
9341 break;
9343 case COMPLEX_TYPE:
9344 mode = TYPE_MODE (TREE_TYPE (type));
9345 if (mode != DFmode && mode != SFmode && mode != TFmode)
9346 return -1;
9348 if (*modep == VOIDmode)
9349 *modep = mode;
9351 if (*modep == mode)
9352 return 2;
9354 break;
9356 case VECTOR_TYPE:
9357 /* Use V2SImode and V4SImode as representatives of all 64-bit
9358 and 128-bit vector types. */
9359 size = int_size_in_bytes (type);
9360 switch (size)
9362 case 8:
9363 mode = V2SImode;
9364 break;
9365 case 16:
9366 mode = V4SImode;
9367 break;
9368 default:
9369 return -1;
9372 if (*modep == VOIDmode)
9373 *modep = mode;
9375 /* Vector modes are considered to be opaque: two vectors are
9376 equivalent for the purposes of being homogeneous aggregates
9377 if they are the same size. */
9378 if (*modep == mode)
9379 return 1;
9381 break;
9383 case ARRAY_TYPE:
9385 int count;
9386 tree index = TYPE_DOMAIN (type);
9388 /* Can't handle incomplete types nor sizes that are not
9389 fixed. */
9390 if (!COMPLETE_TYPE_P (type)
9391 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9392 return -1;
9394 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
9395 if (count == -1
9396 || !index
9397 || !TYPE_MAX_VALUE (index)
9398 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9399 || !TYPE_MIN_VALUE (index)
9400 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9401 || count < 0)
9402 return -1;
9404 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9405 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9407 /* There must be no padding. */
9408 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9409 return -1;
9411 return count;
9414 case RECORD_TYPE:
9416 int count = 0;
9417 int sub_count;
9418 tree field;
9420 /* Can't handle incomplete types nor sizes that are not
9421 fixed. */
9422 if (!COMPLETE_TYPE_P (type)
9423 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9424 return -1;
9426 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9428 if (TREE_CODE (field) != FIELD_DECL)
9429 continue;
9431 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
9432 if (sub_count < 0)
9433 return -1;
9434 count += sub_count;
9437 /* There must be no padding. */
9438 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9439 return -1;
9441 return count;
9444 case UNION_TYPE:
9445 case QUAL_UNION_TYPE:
9447 /* These aren't very interesting except in a degenerate case. */
9448 int count = 0;
9449 int sub_count;
9450 tree field;
9452 /* Can't handle incomplete types nor sizes that are not
9453 fixed. */
9454 if (!COMPLETE_TYPE_P (type)
9455 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9456 return -1;
9458 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9460 if (TREE_CODE (field) != FIELD_DECL)
9461 continue;
9463 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
9464 if (sub_count < 0)
9465 return -1;
9466 count = count > sub_count ? count : sub_count;
9469 /* There must be no padding. */
9470 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9471 return -1;
9473 return count;
9476 default:
9477 break;
9480 return -1;
9483 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
9484 type as described in AAPCS64 \S 4.1.2.
9486 See the comment above aarch64_composite_type_p for the notes on MODE. */
9488 static bool
9489 aarch64_short_vector_p (const_tree type,
9490 machine_mode mode)
9492 HOST_WIDE_INT size = -1;
9494 if (type && TREE_CODE (type) == VECTOR_TYPE)
9495 size = int_size_in_bytes (type);
9496 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9497 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9498 size = GET_MODE_SIZE (mode);
9500 return (size == 8 || size == 16);
9503 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
9504 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
9505 array types. The C99 floating-point complex types are also considered
9506 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
9507 types, which are GCC extensions and out of the scope of AAPCS64, are
9508 treated as composite types here as well.
9510 Note that MODE itself is not sufficient in determining whether a type
9511 is such a composite type or not. This is because
9512 stor-layout.c:compute_record_mode may have already changed the MODE
9513 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
9514 structure with only one field may have its MODE set to the mode of the
9515 field. Also an integer mode whose size matches the size of the
9516 RECORD_TYPE type may be used to substitute the original mode
9517 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
9518 solely relied on. */
9520 static bool
9521 aarch64_composite_type_p (const_tree type,
9522 machine_mode mode)
9524 if (aarch64_short_vector_p (type, mode))
9525 return false;
9527 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
9528 return true;
9530 if (mode == BLKmode
9531 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
9532 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
9533 return true;
9535 return false;
9538 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
9539 shall be passed or returned in simd/fp register(s) (providing these
9540 parameter passing registers are available).
9542 Upon successful return, *COUNT returns the number of needed registers,
9543 *BASE_MODE returns the mode of the individual register and when IS_HAF
9544 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
9545 floating-point aggregate or a homogeneous short-vector aggregate. */
9547 static bool
9548 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
9549 const_tree type,
9550 machine_mode *base_mode,
9551 int *count,
9552 bool *is_ha)
9554 machine_mode new_mode = VOIDmode;
9555 bool composite_p = aarch64_composite_type_p (type, mode);
9557 if (is_ha != NULL) *is_ha = false;
9559 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
9560 || aarch64_short_vector_p (type, mode))
9562 *count = 1;
9563 new_mode = mode;
9565 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9567 if (is_ha != NULL) *is_ha = true;
9568 *count = 2;
9569 new_mode = GET_MODE_INNER (mode);
9571 else if (type && composite_p)
9573 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
9575 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
9577 if (is_ha != NULL) *is_ha = true;
9578 *count = ag_count;
9580 else
9581 return false;
9583 else
9584 return false;
9586 *base_mode = new_mode;
9587 return true;
9590 /* Implement TARGET_STRUCT_VALUE_RTX. */
9592 static rtx
9593 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
9594 int incoming ATTRIBUTE_UNUSED)
9596 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
9599 /* Implements target hook vector_mode_supported_p. */
9600 static bool
9601 aarch64_vector_mode_supported_p (machine_mode mode)
9603 if (TARGET_SIMD
9604 && (mode == V4SImode || mode == V8HImode
9605 || mode == V16QImode || mode == V2DImode
9606 || mode == V2SImode || mode == V4HImode
9607 || mode == V8QImode || mode == V2SFmode
9608 || mode == V4SFmode || mode == V2DFmode
9609 || mode == V1DFmode))
9610 return true;
9612 return false;
9615 /* Return appropriate SIMD container
9616 for MODE within a vector of WIDTH bits. */
9617 static machine_mode
9618 aarch64_simd_container_mode (machine_mode mode, unsigned width)
9620 gcc_assert (width == 64 || width == 128);
9621 if (TARGET_SIMD)
9623 if (width == 128)
9624 switch (mode)
9626 case DFmode:
9627 return V2DFmode;
9628 case SFmode:
9629 return V4SFmode;
9630 case SImode:
9631 return V4SImode;
9632 case HImode:
9633 return V8HImode;
9634 case QImode:
9635 return V16QImode;
9636 case DImode:
9637 return V2DImode;
9638 default:
9639 break;
9641 else
9642 switch (mode)
9644 case SFmode:
9645 return V2SFmode;
9646 case SImode:
9647 return V2SImode;
9648 case HImode:
9649 return V4HImode;
9650 case QImode:
9651 return V8QImode;
9652 default:
9653 break;
9656 return word_mode;
9659 /* Return 128-bit container as the preferred SIMD mode for MODE. */
9660 static machine_mode
9661 aarch64_preferred_simd_mode (machine_mode mode)
9663 return aarch64_simd_container_mode (mode, 128);
9666 /* Return the bitmask of possible vector sizes for the vectorizer
9667 to iterate over. */
9668 static unsigned int
9669 aarch64_autovectorize_vector_sizes (void)
9671 return (16 | 8);
9674 /* Implement TARGET_MANGLE_TYPE. */
9676 static const char *
9677 aarch64_mangle_type (const_tree type)
9679 /* The AArch64 ABI documents say that "__va_list" has to be
9680 managled as if it is in the "std" namespace. */
9681 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
9682 return "St9__va_list";
9684 /* Half-precision float. */
9685 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
9686 return "Dh";
9688 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
9689 builtin types. */
9690 if (TYPE_NAME (type) != NULL)
9691 return aarch64_mangle_builtin_type (type);
9693 /* Use the default mangling. */
9694 return NULL;
9698 /* Return true if the rtx_insn contains a MEM RTX somewhere
9699 in it. */
9701 static bool
9702 has_memory_op (rtx_insn *mem_insn)
9704 subrtx_iterator::array_type array;
9705 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
9706 if (MEM_P (*iter))
9707 return true;
9709 return false;
9712 /* Find the first rtx_insn before insn that will generate an assembly
9713 instruction. */
9715 static rtx_insn *
9716 aarch64_prev_real_insn (rtx_insn *insn)
9718 if (!insn)
9719 return NULL;
9723 insn = prev_real_insn (insn);
9725 while (insn && recog_memoized (insn) < 0);
9727 return insn;
9730 static bool
9731 is_madd_op (enum attr_type t1)
9733 unsigned int i;
9734 /* A number of these may be AArch32 only. */
9735 enum attr_type mlatypes[] = {
9736 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
9737 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
9738 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
9741 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
9743 if (t1 == mlatypes[i])
9744 return true;
9747 return false;
9750 /* Check if there is a register dependency between a load and the insn
9751 for which we hold recog_data. */
9753 static bool
9754 dep_between_memop_and_curr (rtx memop)
9756 rtx load_reg;
9757 int opno;
9759 gcc_assert (GET_CODE (memop) == SET);
9761 if (!REG_P (SET_DEST (memop)))
9762 return false;
9764 load_reg = SET_DEST (memop);
9765 for (opno = 1; opno < recog_data.n_operands; opno++)
9767 rtx operand = recog_data.operand[opno];
9768 if (REG_P (operand)
9769 && reg_overlap_mentioned_p (load_reg, operand))
9770 return true;
9773 return false;
9777 /* When working around the Cortex-A53 erratum 835769,
9778 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
9779 instruction and has a preceding memory instruction such that a NOP
9780 should be inserted between them. */
9782 bool
9783 aarch64_madd_needs_nop (rtx_insn* insn)
9785 enum attr_type attr_type;
9786 rtx_insn *prev;
9787 rtx body;
9789 if (!TARGET_FIX_ERR_A53_835769)
9790 return false;
9792 if (recog_memoized (insn) < 0)
9793 return false;
9795 attr_type = get_attr_type (insn);
9796 if (!is_madd_op (attr_type))
9797 return false;
9799 prev = aarch64_prev_real_insn (insn);
9800 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
9801 Restore recog state to INSN to avoid state corruption. */
9802 extract_constrain_insn_cached (insn);
9804 if (!prev || !has_memory_op (prev))
9805 return false;
9807 body = single_set (prev);
9809 /* If the previous insn is a memory op and there is no dependency between
9810 it and the DImode madd, emit a NOP between them. If body is NULL then we
9811 have a complex memory operation, probably a load/store pair.
9812 Be conservative for now and emit a NOP. */
9813 if (GET_MODE (recog_data.operand[0]) == DImode
9814 && (!body || !dep_between_memop_and_curr (body)))
9815 return true;
9817 return false;
9822 /* Implement FINAL_PRESCAN_INSN. */
9824 void
9825 aarch64_final_prescan_insn (rtx_insn *insn)
9827 if (aarch64_madd_needs_nop (insn))
9828 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
9832 /* Return the equivalent letter for size. */
9833 static char
9834 sizetochar (int size)
9836 switch (size)
9838 case 64: return 'd';
9839 case 32: return 's';
9840 case 16: return 'h';
9841 case 8 : return 'b';
9842 default: gcc_unreachable ();
9846 /* Return true iff x is a uniform vector of floating-point
9847 constants, and the constant can be represented in
9848 quarter-precision form. Note, as aarch64_float_const_representable
9849 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
9850 static bool
9851 aarch64_vect_float_const_representable_p (rtx x)
9853 int i = 0;
9854 REAL_VALUE_TYPE r0, ri;
9855 rtx x0, xi;
9857 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
9858 return false;
9860 x0 = CONST_VECTOR_ELT (x, 0);
9861 if (!CONST_DOUBLE_P (x0))
9862 return false;
9864 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
9866 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
9868 xi = CONST_VECTOR_ELT (x, i);
9869 if (!CONST_DOUBLE_P (xi))
9870 return false;
9872 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
9873 if (!REAL_VALUES_EQUAL (r0, ri))
9874 return false;
9877 return aarch64_float_const_representable_p (x0);
9880 /* Return true for valid and false for invalid. */
9881 bool
9882 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
9883 struct simd_immediate_info *info)
9885 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
9886 matches = 1; \
9887 for (i = 0; i < idx; i += (STRIDE)) \
9888 if (!(TEST)) \
9889 matches = 0; \
9890 if (matches) \
9892 immtype = (CLASS); \
9893 elsize = (ELSIZE); \
9894 eshift = (SHIFT); \
9895 emvn = (NEG); \
9896 break; \
9899 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
9900 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
9901 unsigned char bytes[16];
9902 int immtype = -1, matches;
9903 unsigned int invmask = inverse ? 0xff : 0;
9904 int eshift, emvn;
9906 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9908 if (! (aarch64_simd_imm_zero_p (op, mode)
9909 || aarch64_vect_float_const_representable_p (op)))
9910 return false;
9912 if (info)
9914 info->value = CONST_VECTOR_ELT (op, 0);
9915 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
9916 info->mvn = false;
9917 info->shift = 0;
9920 return true;
9923 /* Splat vector constant out into a byte vector. */
9924 for (i = 0; i < n_elts; i++)
9926 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
9927 it must be laid out in the vector register in reverse order. */
9928 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
9929 unsigned HOST_WIDE_INT elpart;
9930 unsigned int part, parts;
9932 if (CONST_INT_P (el))
9934 elpart = INTVAL (el);
9935 parts = 1;
9937 else if (GET_CODE (el) == CONST_DOUBLE)
9939 elpart = CONST_DOUBLE_LOW (el);
9940 parts = 2;
9942 else
9943 gcc_unreachable ();
9945 for (part = 0; part < parts; part++)
9947 unsigned int byte;
9948 for (byte = 0; byte < innersize; byte++)
9950 bytes[idx++] = (elpart & 0xff) ^ invmask;
9951 elpart >>= BITS_PER_UNIT;
9953 if (GET_CODE (el) == CONST_DOUBLE)
9954 elpart = CONST_DOUBLE_HIGH (el);
9958 /* Sanity check. */
9959 gcc_assert (idx == GET_MODE_SIZE (mode));
9963 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9964 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
9966 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9967 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
9969 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9970 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
9972 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9973 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
9975 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
9977 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
9979 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9980 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
9982 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9983 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
9985 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9986 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
9988 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9989 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
9991 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
9993 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
9995 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9996 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
9998 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9999 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
10001 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
10002 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
10004 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
10005 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
10007 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
10009 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
10010 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
10012 while (0);
10014 if (immtype == -1)
10015 return false;
10017 if (info)
10019 info->element_width = elsize;
10020 info->mvn = emvn != 0;
10021 info->shift = eshift;
10023 unsigned HOST_WIDE_INT imm = 0;
10025 if (immtype >= 12 && immtype <= 15)
10026 info->msl = true;
10028 /* Un-invert bytes of recognized vector, if necessary. */
10029 if (invmask != 0)
10030 for (i = 0; i < idx; i++)
10031 bytes[i] ^= invmask;
10033 if (immtype == 17)
10035 /* FIXME: Broken on 32-bit H_W_I hosts. */
10036 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
10038 for (i = 0; i < 8; i++)
10039 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
10040 << (i * BITS_PER_UNIT);
10043 info->value = GEN_INT (imm);
10045 else
10047 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
10048 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
10050 /* Construct 'abcdefgh' because the assembler cannot handle
10051 generic constants. */
10052 if (info->mvn)
10053 imm = ~imm;
10054 imm = (imm >> info->shift) & 0xff;
10055 info->value = GEN_INT (imm);
10059 return true;
10060 #undef CHECK
10063 /* Check of immediate shift constants are within range. */
10064 bool
10065 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
10067 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
10068 if (left)
10069 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
10070 else
10071 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
10074 /* Return true if X is a uniform vector where all elements
10075 are either the floating-point constant 0.0 or the
10076 integer constant 0. */
10077 bool
10078 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
10080 return x == CONST0_RTX (mode);
10083 bool
10084 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
10086 HOST_WIDE_INT imm = INTVAL (x);
10087 int i;
10089 for (i = 0; i < 8; i++)
10091 unsigned int byte = imm & 0xff;
10092 if (byte != 0xff && byte != 0)
10093 return false;
10094 imm >>= 8;
10097 return true;
10100 bool
10101 aarch64_mov_operand_p (rtx x,
10102 enum aarch64_symbol_context context,
10103 machine_mode mode)
10105 if (GET_CODE (x) == HIGH
10106 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
10107 return true;
10109 if (CONST_INT_P (x))
10110 return true;
10112 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
10113 return true;
10115 return aarch64_classify_symbolic_expression (x, context)
10116 == SYMBOL_TINY_ABSOLUTE;
10119 /* Return a const_int vector of VAL. */
10121 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
10123 int nunits = GET_MODE_NUNITS (mode);
10124 rtvec v = rtvec_alloc (nunits);
10125 int i;
10127 for (i=0; i < nunits; i++)
10128 RTVEC_ELT (v, i) = GEN_INT (val);
10130 return gen_rtx_CONST_VECTOR (mode, v);
10133 /* Check OP is a legal scalar immediate for the MOVI instruction. */
10135 bool
10136 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
10138 machine_mode vmode;
10140 gcc_assert (!VECTOR_MODE_P (mode));
10141 vmode = aarch64_preferred_simd_mode (mode);
10142 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
10143 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
10146 /* Construct and return a PARALLEL RTX vector with elements numbering the
10147 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10148 the vector - from the perspective of the architecture. This does not
10149 line up with GCC's perspective on lane numbers, so we end up with
10150 different masks depending on our target endian-ness. The diagram
10151 below may help. We must draw the distinction when building masks
10152 which select one half of the vector. An instruction selecting
10153 architectural low-lanes for a big-endian target, must be described using
10154 a mask selecting GCC high-lanes.
10156 Big-Endian Little-Endian
10158 GCC 0 1 2 3 3 2 1 0
10159 | x | x | x | x | | x | x | x | x |
10160 Architecture 3 2 1 0 3 2 1 0
10162 Low Mask: { 2, 3 } { 0, 1 }
10163 High Mask: { 0, 1 } { 2, 3 }
10167 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
10169 int nunits = GET_MODE_NUNITS (mode);
10170 rtvec v = rtvec_alloc (nunits / 2);
10171 int high_base = nunits / 2;
10172 int low_base = 0;
10173 int base;
10174 rtx t1;
10175 int i;
10177 if (BYTES_BIG_ENDIAN)
10178 base = high ? low_base : high_base;
10179 else
10180 base = high ? high_base : low_base;
10182 for (i = 0; i < nunits / 2; i++)
10183 RTVEC_ELT (v, i) = GEN_INT (base + i);
10185 t1 = gen_rtx_PARALLEL (mode, v);
10186 return t1;
10189 /* Check OP for validity as a PARALLEL RTX vector with elements
10190 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
10191 from the perspective of the architecture. See the diagram above
10192 aarch64_simd_vect_par_cnst_half for more details. */
10194 bool
10195 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
10196 bool high)
10198 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
10199 HOST_WIDE_INT count_op = XVECLEN (op, 0);
10200 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
10201 int i = 0;
10203 if (!VECTOR_MODE_P (mode))
10204 return false;
10206 if (count_op != count_ideal)
10207 return false;
10209 for (i = 0; i < count_ideal; i++)
10211 rtx elt_op = XVECEXP (op, 0, i);
10212 rtx elt_ideal = XVECEXP (ideal, 0, i);
10214 if (!CONST_INT_P (elt_op)
10215 || INTVAL (elt_ideal) != INTVAL (elt_op))
10216 return false;
10218 return true;
10221 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
10222 HIGH (exclusive). */
10223 void
10224 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
10225 const_tree exp)
10227 HOST_WIDE_INT lane;
10228 gcc_assert (CONST_INT_P (operand));
10229 lane = INTVAL (operand);
10231 if (lane < low || lane >= high)
10233 if (exp)
10234 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
10235 else
10236 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
10240 /* Return TRUE if OP is a valid vector addressing mode. */
10241 bool
10242 aarch64_simd_mem_operand_p (rtx op)
10244 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
10245 || REG_P (XEXP (op, 0)));
10248 /* Emit a register copy from operand to operand, taking care not to
10249 early-clobber source registers in the process.
10251 COUNT is the number of components into which the copy needs to be
10252 decomposed. */
10253 void
10254 aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
10255 unsigned int count)
10257 unsigned int i;
10258 int rdest = REGNO (operands[0]);
10259 int rsrc = REGNO (operands[1]);
10261 if (!reg_overlap_mentioned_p (operands[0], operands[1])
10262 || rdest < rsrc)
10263 for (i = 0; i < count; i++)
10264 emit_move_insn (gen_rtx_REG (mode, rdest + i),
10265 gen_rtx_REG (mode, rsrc + i));
10266 else
10267 for (i = 0; i < count; i++)
10268 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
10269 gen_rtx_REG (mode, rsrc + count - i - 1));
10272 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
10273 one of VSTRUCT modes: OI, CI or XI. */
10275 aarch64_simd_attr_length_move (rtx_insn *insn)
10277 machine_mode mode;
10279 extract_insn_cached (insn);
10281 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
10283 mode = GET_MODE (recog_data.operand[0]);
10284 switch (mode)
10286 case OImode:
10287 return 8;
10288 case CImode:
10289 return 12;
10290 case XImode:
10291 return 16;
10292 default:
10293 gcc_unreachable ();
10296 return 4;
10299 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
10300 one of VSTRUCT modes: OI, CI, EI, or XI. */
10302 aarch64_simd_attr_length_rglist (enum machine_mode mode)
10304 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
10307 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
10308 alignment of a vector to 128 bits. */
10309 static HOST_WIDE_INT
10310 aarch64_simd_vector_alignment (const_tree type)
10312 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
10313 return MIN (align, 128);
10316 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
10317 static bool
10318 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
10320 if (is_packed)
10321 return false;
10323 /* We guarantee alignment for vectors up to 128-bits. */
10324 if (tree_int_cst_compare (TYPE_SIZE (type),
10325 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
10326 return false;
10328 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
10329 return true;
10332 /* If VALS is a vector constant that can be loaded into a register
10333 using DUP, generate instructions to do so and return an RTX to
10334 assign to the register. Otherwise return NULL_RTX. */
10335 static rtx
10336 aarch64_simd_dup_constant (rtx vals)
10338 machine_mode mode = GET_MODE (vals);
10339 machine_mode inner_mode = GET_MODE_INNER (mode);
10340 int n_elts = GET_MODE_NUNITS (mode);
10341 bool all_same = true;
10342 rtx x;
10343 int i;
10345 if (GET_CODE (vals) != CONST_VECTOR)
10346 return NULL_RTX;
10348 for (i = 1; i < n_elts; ++i)
10350 x = CONST_VECTOR_ELT (vals, i);
10351 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
10352 all_same = false;
10355 if (!all_same)
10356 return NULL_RTX;
10358 /* We can load this constant by using DUP and a constant in a
10359 single ARM register. This will be cheaper than a vector
10360 load. */
10361 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
10362 return gen_rtx_VEC_DUPLICATE (mode, x);
10366 /* Generate code to load VALS, which is a PARALLEL containing only
10367 constants (for vec_init) or CONST_VECTOR, efficiently into a
10368 register. Returns an RTX to copy into the register, or NULL_RTX
10369 for a PARALLEL that can not be converted into a CONST_VECTOR. */
10370 static rtx
10371 aarch64_simd_make_constant (rtx vals)
10373 machine_mode mode = GET_MODE (vals);
10374 rtx const_dup;
10375 rtx const_vec = NULL_RTX;
10376 int n_elts = GET_MODE_NUNITS (mode);
10377 int n_const = 0;
10378 int i;
10380 if (GET_CODE (vals) == CONST_VECTOR)
10381 const_vec = vals;
10382 else if (GET_CODE (vals) == PARALLEL)
10384 /* A CONST_VECTOR must contain only CONST_INTs and
10385 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
10386 Only store valid constants in a CONST_VECTOR. */
10387 for (i = 0; i < n_elts; ++i)
10389 rtx x = XVECEXP (vals, 0, i);
10390 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10391 n_const++;
10393 if (n_const == n_elts)
10394 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
10396 else
10397 gcc_unreachable ();
10399 if (const_vec != NULL_RTX
10400 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
10401 /* Load using MOVI/MVNI. */
10402 return const_vec;
10403 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
10404 /* Loaded using DUP. */
10405 return const_dup;
10406 else if (const_vec != NULL_RTX)
10407 /* Load from constant pool. We can not take advantage of single-cycle
10408 LD1 because we need a PC-relative addressing mode. */
10409 return const_vec;
10410 else
10411 /* A PARALLEL containing something not valid inside CONST_VECTOR.
10412 We can not construct an initializer. */
10413 return NULL_RTX;
10416 void
10417 aarch64_expand_vector_init (rtx target, rtx vals)
10419 machine_mode mode = GET_MODE (target);
10420 machine_mode inner_mode = GET_MODE_INNER (mode);
10421 int n_elts = GET_MODE_NUNITS (mode);
10422 int n_var = 0;
10423 rtx any_const = NULL_RTX;
10424 bool all_same = true;
10426 for (int i = 0; i < n_elts; ++i)
10428 rtx x = XVECEXP (vals, 0, i);
10429 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
10430 ++n_var;
10431 else
10432 any_const = x;
10434 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
10435 all_same = false;
10438 if (n_var == 0)
10440 rtx constant = aarch64_simd_make_constant (vals);
10441 if (constant != NULL_RTX)
10443 emit_move_insn (target, constant);
10444 return;
10448 /* Splat a single non-constant element if we can. */
10449 if (all_same)
10451 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
10452 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
10453 return;
10456 /* Half the fields (or less) are non-constant. Load constant then overwrite
10457 varying fields. Hope that this is more efficient than using the stack. */
10458 if (n_var <= n_elts/2)
10460 rtx copy = copy_rtx (vals);
10462 /* Load constant part of vector. We really don't care what goes into the
10463 parts we will overwrite, but we're more likely to be able to load the
10464 constant efficiently if it has fewer, larger, repeating parts
10465 (see aarch64_simd_valid_immediate). */
10466 for (int i = 0; i < n_elts; i++)
10468 rtx x = XVECEXP (vals, 0, i);
10469 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10470 continue;
10471 rtx subst = any_const;
10472 for (int bit = n_elts / 2; bit > 0; bit /= 2)
10474 /* Look in the copied vector, as more elements are const. */
10475 rtx test = XVECEXP (copy, 0, i ^ bit);
10476 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
10478 subst = test;
10479 break;
10482 XVECEXP (copy, 0, i) = subst;
10484 aarch64_expand_vector_init (target, copy);
10486 /* Insert variables. */
10487 enum insn_code icode = optab_handler (vec_set_optab, mode);
10488 gcc_assert (icode != CODE_FOR_nothing);
10490 for (int i = 0; i < n_elts; i++)
10492 rtx x = XVECEXP (vals, 0, i);
10493 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
10494 continue;
10495 x = copy_to_mode_reg (inner_mode, x);
10496 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
10498 return;
10501 /* Construct the vector in memory one field at a time
10502 and load the whole vector. */
10503 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
10504 for (int i = 0; i < n_elts; i++)
10505 emit_move_insn (adjust_address_nv (mem, inner_mode,
10506 i * GET_MODE_SIZE (inner_mode)),
10507 XVECEXP (vals, 0, i));
10508 emit_move_insn (target, mem);
10512 static unsigned HOST_WIDE_INT
10513 aarch64_shift_truncation_mask (machine_mode mode)
10515 return
10516 (aarch64_vector_mode_supported_p (mode)
10517 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
10520 #ifndef TLS_SECTION_ASM_FLAG
10521 #define TLS_SECTION_ASM_FLAG 'T'
10522 #endif
10524 void
10525 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
10526 tree decl ATTRIBUTE_UNUSED)
10528 char flagchars[10], *f = flagchars;
10530 /* If we have already declared this section, we can use an
10531 abbreviated form to switch back to it -- unless this section is
10532 part of a COMDAT groups, in which case GAS requires the full
10533 declaration every time. */
10534 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10535 && (flags & SECTION_DECLARED))
10537 fprintf (asm_out_file, "\t.section\t%s\n", name);
10538 return;
10541 if (!(flags & SECTION_DEBUG))
10542 *f++ = 'a';
10543 if (flags & SECTION_WRITE)
10544 *f++ = 'w';
10545 if (flags & SECTION_CODE)
10546 *f++ = 'x';
10547 if (flags & SECTION_SMALL)
10548 *f++ = 's';
10549 if (flags & SECTION_MERGE)
10550 *f++ = 'M';
10551 if (flags & SECTION_STRINGS)
10552 *f++ = 'S';
10553 if (flags & SECTION_TLS)
10554 *f++ = TLS_SECTION_ASM_FLAG;
10555 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10556 *f++ = 'G';
10557 *f = '\0';
10559 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
10561 if (!(flags & SECTION_NOTYPE))
10563 const char *type;
10564 const char *format;
10566 if (flags & SECTION_BSS)
10567 type = "nobits";
10568 else
10569 type = "progbits";
10571 #ifdef TYPE_OPERAND_FMT
10572 format = "," TYPE_OPERAND_FMT;
10573 #else
10574 format = ",@%s";
10575 #endif
10577 fprintf (asm_out_file, format, type);
10579 if (flags & SECTION_ENTSIZE)
10580 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
10581 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
10583 if (TREE_CODE (decl) == IDENTIFIER_NODE)
10584 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
10585 else
10586 fprintf (asm_out_file, ",%s,comdat",
10587 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
10591 putc ('\n', asm_out_file);
10594 /* Select a format to encode pointers in exception handling data. */
10596 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
10598 int type;
10599 switch (aarch64_cmodel)
10601 case AARCH64_CMODEL_TINY:
10602 case AARCH64_CMODEL_TINY_PIC:
10603 case AARCH64_CMODEL_SMALL:
10604 case AARCH64_CMODEL_SMALL_PIC:
10605 case AARCH64_CMODEL_SMALL_SPIC:
10606 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
10607 for everything. */
10608 type = DW_EH_PE_sdata4;
10609 break;
10610 default:
10611 /* No assumptions here. 8-byte relocs required. */
10612 type = DW_EH_PE_sdata8;
10613 break;
10615 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
10618 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
10619 by the function fndecl. */
10621 void
10622 aarch64_declare_function_name (FILE *stream, const char* name,
10623 tree fndecl)
10625 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
10627 struct cl_target_option *targ_options;
10628 if (target_parts)
10629 targ_options = TREE_TARGET_OPTION (target_parts);
10630 else
10631 targ_options = TREE_TARGET_OPTION (target_option_current_node);
10632 gcc_assert (targ_options);
10634 const struct processor *this_arch
10635 = aarch64_get_arch (targ_options->x_explicit_arch);
10637 asm_fprintf (asm_out_file, "\t.arch %s", this_arch->name);
10638 aarch64_print_extension (asm_out_file, targ_options->x_aarch64_isa_flags);
10640 /* Print the cpu name we're tuning for in the comments, might be
10641 useful to readers of the generated asm. */
10643 const struct processor *this_tune
10644 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
10646 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
10647 this_tune->name);
10649 /* Don't forget the type directive for ELF. */
10650 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
10651 ASM_OUTPUT_LABEL (stream, name);
10654 /* Emit load exclusive. */
10656 static void
10657 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
10658 rtx mem, rtx model_rtx)
10660 rtx (*gen) (rtx, rtx, rtx);
10662 switch (mode)
10664 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
10665 case HImode: gen = gen_aarch64_load_exclusivehi; break;
10666 case SImode: gen = gen_aarch64_load_exclusivesi; break;
10667 case DImode: gen = gen_aarch64_load_exclusivedi; break;
10668 default:
10669 gcc_unreachable ();
10672 emit_insn (gen (rval, mem, model_rtx));
10675 /* Emit store exclusive. */
10677 static void
10678 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
10679 rtx rval, rtx mem, rtx model_rtx)
10681 rtx (*gen) (rtx, rtx, rtx, rtx);
10683 switch (mode)
10685 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
10686 case HImode: gen = gen_aarch64_store_exclusivehi; break;
10687 case SImode: gen = gen_aarch64_store_exclusivesi; break;
10688 case DImode: gen = gen_aarch64_store_exclusivedi; break;
10689 default:
10690 gcc_unreachable ();
10693 emit_insn (gen (bval, rval, mem, model_rtx));
10696 /* Mark the previous jump instruction as unlikely. */
10698 static void
10699 aarch64_emit_unlikely_jump (rtx insn)
10701 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
10703 insn = emit_jump_insn (insn);
10704 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
10707 /* Expand a compare and swap pattern. */
10709 void
10710 aarch64_expand_compare_and_swap (rtx operands[])
10712 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
10713 machine_mode mode, cmp_mode;
10714 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
10716 bval = operands[0];
10717 rval = operands[1];
10718 mem = operands[2];
10719 oldval = operands[3];
10720 newval = operands[4];
10721 is_weak = operands[5];
10722 mod_s = operands[6];
10723 mod_f = operands[7];
10724 mode = GET_MODE (mem);
10725 cmp_mode = mode;
10727 /* Normally the succ memory model must be stronger than fail, but in the
10728 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
10729 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
10731 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
10732 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
10733 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
10735 switch (mode)
10737 case QImode:
10738 case HImode:
10739 /* For short modes, we're going to perform the comparison in SImode,
10740 so do the zero-extension now. */
10741 cmp_mode = SImode;
10742 rval = gen_reg_rtx (SImode);
10743 oldval = convert_modes (SImode, mode, oldval, true);
10744 /* Fall through. */
10746 case SImode:
10747 case DImode:
10748 /* Force the value into a register if needed. */
10749 if (!aarch64_plus_operand (oldval, mode))
10750 oldval = force_reg (cmp_mode, oldval);
10751 break;
10753 default:
10754 gcc_unreachable ();
10757 switch (mode)
10759 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
10760 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
10761 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
10762 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
10763 default:
10764 gcc_unreachable ();
10767 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
10769 if (mode == QImode || mode == HImode)
10770 emit_move_insn (operands[1], gen_lowpart (mode, rval));
10772 x = gen_rtx_REG (CCmode, CC_REGNUM);
10773 x = gen_rtx_EQ (SImode, x, const0_rtx);
10774 emit_insn (gen_rtx_SET (bval, x));
10777 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
10778 sequence implementing an atomic operation. */
10780 static void
10781 aarch64_emit_post_barrier (enum memmodel model)
10783 const enum memmodel base_model = memmodel_base (model);
10785 if (is_mm_sync (model)
10786 && (base_model == MEMMODEL_ACQUIRE
10787 || base_model == MEMMODEL_ACQ_REL
10788 || base_model == MEMMODEL_SEQ_CST))
10790 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
10794 /* Split a compare and swap pattern. */
10796 void
10797 aarch64_split_compare_and_swap (rtx operands[])
10799 rtx rval, mem, oldval, newval, scratch;
10800 machine_mode mode;
10801 bool is_weak;
10802 rtx_code_label *label1, *label2;
10803 rtx x, cond;
10804 enum memmodel model;
10805 rtx model_rtx;
10807 rval = operands[0];
10808 mem = operands[1];
10809 oldval = operands[2];
10810 newval = operands[3];
10811 is_weak = (operands[4] != const0_rtx);
10812 model_rtx = operands[5];
10813 scratch = operands[7];
10814 mode = GET_MODE (mem);
10815 model = memmodel_from_int (INTVAL (model_rtx));
10817 label1 = NULL;
10818 if (!is_weak)
10820 label1 = gen_label_rtx ();
10821 emit_label (label1);
10823 label2 = gen_label_rtx ();
10825 /* The initial load can be relaxed for a __sync operation since a final
10826 barrier will be emitted to stop code hoisting. */
10827 if (is_mm_sync (model))
10828 aarch64_emit_load_exclusive (mode, rval, mem,
10829 GEN_INT (MEMMODEL_RELAXED));
10830 else
10831 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
10833 cond = aarch64_gen_compare_reg (NE, rval, oldval);
10834 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10835 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10836 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
10837 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10839 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
10841 if (!is_weak)
10843 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
10844 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10845 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
10846 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10848 else
10850 cond = gen_rtx_REG (CCmode, CC_REGNUM);
10851 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
10852 emit_insn (gen_rtx_SET (cond, x));
10855 emit_label (label2);
10857 /* Emit any final barrier needed for a __sync operation. */
10858 if (is_mm_sync (model))
10859 aarch64_emit_post_barrier (model);
10862 /* Split an atomic operation. */
10864 void
10865 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
10866 rtx value, rtx model_rtx, rtx cond)
10868 machine_mode mode = GET_MODE (mem);
10869 machine_mode wmode = (mode == DImode ? DImode : SImode);
10870 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
10871 const bool is_sync = is_mm_sync (model);
10872 rtx_code_label *label;
10873 rtx x;
10875 label = gen_label_rtx ();
10876 emit_label (label);
10878 if (new_out)
10879 new_out = gen_lowpart (wmode, new_out);
10880 if (old_out)
10881 old_out = gen_lowpart (wmode, old_out);
10882 else
10883 old_out = new_out;
10884 value = simplify_gen_subreg (wmode, value, mode, 0);
10886 /* The initial load can be relaxed for a __sync operation since a final
10887 barrier will be emitted to stop code hoisting. */
10888 if (is_sync)
10889 aarch64_emit_load_exclusive (mode, old_out, mem,
10890 GEN_INT (MEMMODEL_RELAXED));
10891 else
10892 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
10894 switch (code)
10896 case SET:
10897 new_out = value;
10898 break;
10900 case NOT:
10901 x = gen_rtx_AND (wmode, old_out, value);
10902 emit_insn (gen_rtx_SET (new_out, x));
10903 x = gen_rtx_NOT (wmode, new_out);
10904 emit_insn (gen_rtx_SET (new_out, x));
10905 break;
10907 case MINUS:
10908 if (CONST_INT_P (value))
10910 value = GEN_INT (-INTVAL (value));
10911 code = PLUS;
10913 /* Fall through. */
10915 default:
10916 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
10917 emit_insn (gen_rtx_SET (new_out, x));
10918 break;
10921 aarch64_emit_store_exclusive (mode, cond, mem,
10922 gen_lowpart (mode, new_out), model_rtx);
10924 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
10925 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
10926 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
10927 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
10929 /* Emit any final barrier needed for a __sync operation. */
10930 if (is_sync)
10931 aarch64_emit_post_barrier (model);
10934 static void
10935 aarch64_init_libfuncs (void)
10937 /* Half-precision float operations. The compiler handles all operations
10938 with NULL libfuncs by converting to SFmode. */
10940 /* Conversions. */
10941 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
10942 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
10944 /* Arithmetic. */
10945 set_optab_libfunc (add_optab, HFmode, NULL);
10946 set_optab_libfunc (sdiv_optab, HFmode, NULL);
10947 set_optab_libfunc (smul_optab, HFmode, NULL);
10948 set_optab_libfunc (neg_optab, HFmode, NULL);
10949 set_optab_libfunc (sub_optab, HFmode, NULL);
10951 /* Comparisons. */
10952 set_optab_libfunc (eq_optab, HFmode, NULL);
10953 set_optab_libfunc (ne_optab, HFmode, NULL);
10954 set_optab_libfunc (lt_optab, HFmode, NULL);
10955 set_optab_libfunc (le_optab, HFmode, NULL);
10956 set_optab_libfunc (ge_optab, HFmode, NULL);
10957 set_optab_libfunc (gt_optab, HFmode, NULL);
10958 set_optab_libfunc (unord_optab, HFmode, NULL);
10961 /* Target hook for c_mode_for_suffix. */
10962 static machine_mode
10963 aarch64_c_mode_for_suffix (char suffix)
10965 if (suffix == 'q')
10966 return TFmode;
10968 return VOIDmode;
10971 /* We can only represent floating point constants which will fit in
10972 "quarter-precision" values. These values are characterised by
10973 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
10976 (-1)^s * (n/16) * 2^r
10978 Where:
10979 's' is the sign bit.
10980 'n' is an integer in the range 16 <= n <= 31.
10981 'r' is an integer in the range -3 <= r <= 4. */
10983 /* Return true iff X can be represented by a quarter-precision
10984 floating point immediate operand X. Note, we cannot represent 0.0. */
10985 bool
10986 aarch64_float_const_representable_p (rtx x)
10988 /* This represents our current view of how many bits
10989 make up the mantissa. */
10990 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
10991 int exponent;
10992 unsigned HOST_WIDE_INT mantissa, mask;
10993 REAL_VALUE_TYPE r, m;
10994 bool fail;
10996 if (!CONST_DOUBLE_P (x))
10997 return false;
10999 /* We don't support HFmode constants yet. */
11000 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
11001 return false;
11003 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11005 /* We cannot represent infinities, NaNs or +/-zero. We won't
11006 know if we have +zero until we analyse the mantissa, but we
11007 can reject the other invalid values. */
11008 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
11009 || REAL_VALUE_MINUS_ZERO (r))
11010 return false;
11012 /* Extract exponent. */
11013 r = real_value_abs (&r);
11014 exponent = REAL_EXP (&r);
11016 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11017 highest (sign) bit, with a fixed binary point at bit point_pos.
11018 m1 holds the low part of the mantissa, m2 the high part.
11019 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
11020 bits for the mantissa, this can fail (low bits will be lost). */
11021 real_ldexp (&m, &r, point_pos - exponent);
11022 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11024 /* If the low part of the mantissa has bits set we cannot represent
11025 the value. */
11026 if (w.elt (0) != 0)
11027 return false;
11028 /* We have rejected the lower HOST_WIDE_INT, so update our
11029 understanding of how many bits lie in the mantissa and
11030 look only at the high HOST_WIDE_INT. */
11031 mantissa = w.elt (1);
11032 point_pos -= HOST_BITS_PER_WIDE_INT;
11034 /* We can only represent values with a mantissa of the form 1.xxxx. */
11035 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11036 if ((mantissa & mask) != 0)
11037 return false;
11039 /* Having filtered unrepresentable values, we may now remove all
11040 but the highest 5 bits. */
11041 mantissa >>= point_pos - 5;
11043 /* We cannot represent the value 0.0, so reject it. This is handled
11044 elsewhere. */
11045 if (mantissa == 0)
11046 return false;
11048 /* Then, as bit 4 is always set, we can mask it off, leaving
11049 the mantissa in the range [0, 15]. */
11050 mantissa &= ~(1 << 4);
11051 gcc_assert (mantissa <= 15);
11053 /* GCC internally does not use IEEE754-like encoding (where normalized
11054 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
11055 Our mantissa values are shifted 4 places to the left relative to
11056 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
11057 by 5 places to correct for GCC's representation. */
11058 exponent = 5 - exponent;
11060 return (exponent >= 0 && exponent <= 7);
11063 char*
11064 aarch64_output_simd_mov_immediate (rtx const_vector,
11065 machine_mode mode,
11066 unsigned width)
11068 bool is_valid;
11069 static char templ[40];
11070 const char *mnemonic;
11071 const char *shift_op;
11072 unsigned int lane_count = 0;
11073 char element_char;
11075 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
11077 /* This will return true to show const_vector is legal for use as either
11078 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
11079 also update INFO to show how the immediate should be generated. */
11080 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
11081 gcc_assert (is_valid);
11083 element_char = sizetochar (info.element_width);
11084 lane_count = width / info.element_width;
11086 mode = GET_MODE_INNER (mode);
11087 if (mode == SFmode || mode == DFmode)
11089 gcc_assert (info.shift == 0 && ! info.mvn);
11090 if (aarch64_float_const_zero_rtx_p (info.value))
11091 info.value = GEN_INT (0);
11092 else
11094 #define buf_size 20
11095 REAL_VALUE_TYPE r;
11096 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
11097 char float_buf[buf_size] = {'\0'};
11098 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
11099 #undef buf_size
11101 if (lane_count == 1)
11102 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
11103 else
11104 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
11105 lane_count, element_char, float_buf);
11106 return templ;
11110 mnemonic = info.mvn ? "mvni" : "movi";
11111 shift_op = info.msl ? "msl" : "lsl";
11113 if (lane_count == 1)
11114 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
11115 mnemonic, UINTVAL (info.value));
11116 else if (info.shift)
11117 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
11118 ", %s %d", mnemonic, lane_count, element_char,
11119 UINTVAL (info.value), shift_op, info.shift);
11120 else
11121 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
11122 mnemonic, lane_count, element_char, UINTVAL (info.value));
11123 return templ;
11126 char*
11127 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
11128 machine_mode mode)
11130 machine_mode vmode;
11132 gcc_assert (!VECTOR_MODE_P (mode));
11133 vmode = aarch64_simd_container_mode (mode, 64);
11134 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
11135 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
11138 /* Split operands into moves from op[1] + op[2] into op[0]. */
11140 void
11141 aarch64_split_combinev16qi (rtx operands[3])
11143 unsigned int dest = REGNO (operands[0]);
11144 unsigned int src1 = REGNO (operands[1]);
11145 unsigned int src2 = REGNO (operands[2]);
11146 machine_mode halfmode = GET_MODE (operands[1]);
11147 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
11148 rtx destlo, desthi;
11150 gcc_assert (halfmode == V16QImode);
11152 if (src1 == dest && src2 == dest + halfregs)
11154 /* No-op move. Can't split to nothing; emit something. */
11155 emit_note (NOTE_INSN_DELETED);
11156 return;
11159 /* Preserve register attributes for variable tracking. */
11160 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
11161 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
11162 GET_MODE_SIZE (halfmode));
11164 /* Special case of reversed high/low parts. */
11165 if (reg_overlap_mentioned_p (operands[2], destlo)
11166 && reg_overlap_mentioned_p (operands[1], desthi))
11168 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
11169 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
11170 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
11172 else if (!reg_overlap_mentioned_p (operands[2], destlo))
11174 /* Try to avoid unnecessary moves if part of the result
11175 is in the right place already. */
11176 if (src1 != dest)
11177 emit_move_insn (destlo, operands[1]);
11178 if (src2 != dest + halfregs)
11179 emit_move_insn (desthi, operands[2]);
11181 else
11183 if (src2 != dest + halfregs)
11184 emit_move_insn (desthi, operands[2]);
11185 if (src1 != dest)
11186 emit_move_insn (destlo, operands[1]);
11190 /* vec_perm support. */
11192 #define MAX_VECT_LEN 16
11194 struct expand_vec_perm_d
11196 rtx target, op0, op1;
11197 unsigned char perm[MAX_VECT_LEN];
11198 machine_mode vmode;
11199 unsigned char nelt;
11200 bool one_vector_p;
11201 bool testing_p;
11204 /* Generate a variable permutation. */
11206 static void
11207 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
11209 machine_mode vmode = GET_MODE (target);
11210 bool one_vector_p = rtx_equal_p (op0, op1);
11212 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
11213 gcc_checking_assert (GET_MODE (op0) == vmode);
11214 gcc_checking_assert (GET_MODE (op1) == vmode);
11215 gcc_checking_assert (GET_MODE (sel) == vmode);
11216 gcc_checking_assert (TARGET_SIMD);
11218 if (one_vector_p)
11220 if (vmode == V8QImode)
11222 /* Expand the argument to a V16QI mode by duplicating it. */
11223 rtx pair = gen_reg_rtx (V16QImode);
11224 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
11225 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
11227 else
11229 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
11232 else
11234 rtx pair;
11236 if (vmode == V8QImode)
11238 pair = gen_reg_rtx (V16QImode);
11239 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
11240 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
11242 else
11244 pair = gen_reg_rtx (OImode);
11245 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
11246 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
11251 void
11252 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
11254 machine_mode vmode = GET_MODE (target);
11255 unsigned int nelt = GET_MODE_NUNITS (vmode);
11256 bool one_vector_p = rtx_equal_p (op0, op1);
11257 rtx mask;
11259 /* The TBL instruction does not use a modulo index, so we must take care
11260 of that ourselves. */
11261 mask = aarch64_simd_gen_const_vector_dup (vmode,
11262 one_vector_p ? nelt - 1 : 2 * nelt - 1);
11263 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
11265 /* For big-endian, we also need to reverse the index within the vector
11266 (but not which vector). */
11267 if (BYTES_BIG_ENDIAN)
11269 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
11270 if (!one_vector_p)
11271 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
11272 sel = expand_simple_binop (vmode, XOR, sel, mask,
11273 NULL, 0, OPTAB_LIB_WIDEN);
11275 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
11278 /* Recognize patterns suitable for the TRN instructions. */
11279 static bool
11280 aarch64_evpc_trn (struct expand_vec_perm_d *d)
11282 unsigned int i, odd, mask, nelt = d->nelt;
11283 rtx out, in0, in1, x;
11284 rtx (*gen) (rtx, rtx, rtx);
11285 machine_mode vmode = d->vmode;
11287 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11288 return false;
11290 /* Note that these are little-endian tests.
11291 We correct for big-endian later. */
11292 if (d->perm[0] == 0)
11293 odd = 0;
11294 else if (d->perm[0] == 1)
11295 odd = 1;
11296 else
11297 return false;
11298 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11300 for (i = 0; i < nelt; i += 2)
11302 if (d->perm[i] != i + odd)
11303 return false;
11304 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
11305 return false;
11308 /* Success! */
11309 if (d->testing_p)
11310 return true;
11312 in0 = d->op0;
11313 in1 = d->op1;
11314 if (BYTES_BIG_ENDIAN)
11316 x = in0, in0 = in1, in1 = x;
11317 odd = !odd;
11319 out = d->target;
11321 if (odd)
11323 switch (vmode)
11325 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
11326 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
11327 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
11328 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
11329 case V4SImode: gen = gen_aarch64_trn2v4si; break;
11330 case V2SImode: gen = gen_aarch64_trn2v2si; break;
11331 case V2DImode: gen = gen_aarch64_trn2v2di; break;
11332 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
11333 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
11334 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
11335 default:
11336 return false;
11339 else
11341 switch (vmode)
11343 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
11344 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
11345 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
11346 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
11347 case V4SImode: gen = gen_aarch64_trn1v4si; break;
11348 case V2SImode: gen = gen_aarch64_trn1v2si; break;
11349 case V2DImode: gen = gen_aarch64_trn1v2di; break;
11350 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
11351 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
11352 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
11353 default:
11354 return false;
11358 emit_insn (gen (out, in0, in1));
11359 return true;
11362 /* Recognize patterns suitable for the UZP instructions. */
11363 static bool
11364 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
11366 unsigned int i, odd, mask, nelt = d->nelt;
11367 rtx out, in0, in1, x;
11368 rtx (*gen) (rtx, rtx, rtx);
11369 machine_mode vmode = d->vmode;
11371 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11372 return false;
11374 /* Note that these are little-endian tests.
11375 We correct for big-endian later. */
11376 if (d->perm[0] == 0)
11377 odd = 0;
11378 else if (d->perm[0] == 1)
11379 odd = 1;
11380 else
11381 return false;
11382 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11384 for (i = 0; i < nelt; i++)
11386 unsigned elt = (i * 2 + odd) & mask;
11387 if (d->perm[i] != elt)
11388 return false;
11391 /* Success! */
11392 if (d->testing_p)
11393 return true;
11395 in0 = d->op0;
11396 in1 = d->op1;
11397 if (BYTES_BIG_ENDIAN)
11399 x = in0, in0 = in1, in1 = x;
11400 odd = !odd;
11402 out = d->target;
11404 if (odd)
11406 switch (vmode)
11408 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
11409 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
11410 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
11411 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
11412 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
11413 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
11414 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
11415 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
11416 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
11417 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
11418 default:
11419 return false;
11422 else
11424 switch (vmode)
11426 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
11427 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
11428 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
11429 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
11430 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
11431 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
11432 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
11433 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
11434 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
11435 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
11436 default:
11437 return false;
11441 emit_insn (gen (out, in0, in1));
11442 return true;
11445 /* Recognize patterns suitable for the ZIP instructions. */
11446 static bool
11447 aarch64_evpc_zip (struct expand_vec_perm_d *d)
11449 unsigned int i, high, mask, nelt = d->nelt;
11450 rtx out, in0, in1, x;
11451 rtx (*gen) (rtx, rtx, rtx);
11452 machine_mode vmode = d->vmode;
11454 if (GET_MODE_UNIT_SIZE (vmode) > 8)
11455 return false;
11457 /* Note that these are little-endian tests.
11458 We correct for big-endian later. */
11459 high = nelt / 2;
11460 if (d->perm[0] == high)
11461 /* Do Nothing. */
11463 else if (d->perm[0] == 0)
11464 high = 0;
11465 else
11466 return false;
11467 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
11469 for (i = 0; i < nelt / 2; i++)
11471 unsigned elt = (i + high) & mask;
11472 if (d->perm[i * 2] != elt)
11473 return false;
11474 elt = (elt + nelt) & mask;
11475 if (d->perm[i * 2 + 1] != elt)
11476 return false;
11479 /* Success! */
11480 if (d->testing_p)
11481 return true;
11483 in0 = d->op0;
11484 in1 = d->op1;
11485 if (BYTES_BIG_ENDIAN)
11487 x = in0, in0 = in1, in1 = x;
11488 high = !high;
11490 out = d->target;
11492 if (high)
11494 switch (vmode)
11496 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
11497 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
11498 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
11499 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
11500 case V4SImode: gen = gen_aarch64_zip2v4si; break;
11501 case V2SImode: gen = gen_aarch64_zip2v2si; break;
11502 case V2DImode: gen = gen_aarch64_zip2v2di; break;
11503 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
11504 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
11505 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
11506 default:
11507 return false;
11510 else
11512 switch (vmode)
11514 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
11515 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
11516 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
11517 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
11518 case V4SImode: gen = gen_aarch64_zip1v4si; break;
11519 case V2SImode: gen = gen_aarch64_zip1v2si; break;
11520 case V2DImode: gen = gen_aarch64_zip1v2di; break;
11521 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
11522 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
11523 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
11524 default:
11525 return false;
11529 emit_insn (gen (out, in0, in1));
11530 return true;
11533 /* Recognize patterns for the EXT insn. */
11535 static bool
11536 aarch64_evpc_ext (struct expand_vec_perm_d *d)
11538 unsigned int i, nelt = d->nelt;
11539 rtx (*gen) (rtx, rtx, rtx, rtx);
11540 rtx offset;
11542 unsigned int location = d->perm[0]; /* Always < nelt. */
11544 /* Check if the extracted indices are increasing by one. */
11545 for (i = 1; i < nelt; i++)
11547 unsigned int required = location + i;
11548 if (d->one_vector_p)
11550 /* We'll pass the same vector in twice, so allow indices to wrap. */
11551 required &= (nelt - 1);
11553 if (d->perm[i] != required)
11554 return false;
11557 switch (d->vmode)
11559 case V16QImode: gen = gen_aarch64_extv16qi; break;
11560 case V8QImode: gen = gen_aarch64_extv8qi; break;
11561 case V4HImode: gen = gen_aarch64_extv4hi; break;
11562 case V8HImode: gen = gen_aarch64_extv8hi; break;
11563 case V2SImode: gen = gen_aarch64_extv2si; break;
11564 case V4SImode: gen = gen_aarch64_extv4si; break;
11565 case V2SFmode: gen = gen_aarch64_extv2sf; break;
11566 case V4SFmode: gen = gen_aarch64_extv4sf; break;
11567 case V2DImode: gen = gen_aarch64_extv2di; break;
11568 case V2DFmode: gen = gen_aarch64_extv2df; break;
11569 default:
11570 return false;
11573 /* Success! */
11574 if (d->testing_p)
11575 return true;
11577 /* The case where (location == 0) is a no-op for both big- and little-endian,
11578 and is removed by the mid-end at optimization levels -O1 and higher. */
11580 if (BYTES_BIG_ENDIAN && (location != 0))
11582 /* After setup, we want the high elements of the first vector (stored
11583 at the LSB end of the register), and the low elements of the second
11584 vector (stored at the MSB end of the register). So swap. */
11585 std::swap (d->op0, d->op1);
11586 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
11587 location = nelt - location;
11590 offset = GEN_INT (location);
11591 emit_insn (gen (d->target, d->op0, d->op1, offset));
11592 return true;
11595 /* Recognize patterns for the REV insns. */
11597 static bool
11598 aarch64_evpc_rev (struct expand_vec_perm_d *d)
11600 unsigned int i, j, diff, nelt = d->nelt;
11601 rtx (*gen) (rtx, rtx);
11603 if (!d->one_vector_p)
11604 return false;
11606 diff = d->perm[0];
11607 switch (diff)
11609 case 7:
11610 switch (d->vmode)
11612 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
11613 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
11614 default:
11615 return false;
11617 break;
11618 case 3:
11619 switch (d->vmode)
11621 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
11622 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
11623 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
11624 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
11625 default:
11626 return false;
11628 break;
11629 case 1:
11630 switch (d->vmode)
11632 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
11633 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
11634 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
11635 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
11636 case V4SImode: gen = gen_aarch64_rev64v4si; break;
11637 case V2SImode: gen = gen_aarch64_rev64v2si; break;
11638 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
11639 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
11640 default:
11641 return false;
11643 break;
11644 default:
11645 return false;
11648 for (i = 0; i < nelt ; i += diff + 1)
11649 for (j = 0; j <= diff; j += 1)
11651 /* This is guaranteed to be true as the value of diff
11652 is 7, 3, 1 and we should have enough elements in the
11653 queue to generate this. Getting a vector mask with a
11654 value of diff other than these values implies that
11655 something is wrong by the time we get here. */
11656 gcc_assert (i + j < nelt);
11657 if (d->perm[i + j] != i + diff - j)
11658 return false;
11661 /* Success! */
11662 if (d->testing_p)
11663 return true;
11665 emit_insn (gen (d->target, d->op0));
11666 return true;
11669 static bool
11670 aarch64_evpc_dup (struct expand_vec_perm_d *d)
11672 rtx (*gen) (rtx, rtx, rtx);
11673 rtx out = d->target;
11674 rtx in0;
11675 machine_mode vmode = d->vmode;
11676 unsigned int i, elt, nelt = d->nelt;
11677 rtx lane;
11679 elt = d->perm[0];
11680 for (i = 1; i < nelt; i++)
11682 if (elt != d->perm[i])
11683 return false;
11686 /* The generic preparation in aarch64_expand_vec_perm_const_1
11687 swaps the operand order and the permute indices if it finds
11688 d->perm[0] to be in the second operand. Thus, we can always
11689 use d->op0 and need not do any extra arithmetic to get the
11690 correct lane number. */
11691 in0 = d->op0;
11692 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
11694 switch (vmode)
11696 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
11697 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
11698 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
11699 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
11700 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
11701 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
11702 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
11703 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
11704 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
11705 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
11706 default:
11707 return false;
11710 emit_insn (gen (out, in0, lane));
11711 return true;
11714 static bool
11715 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
11717 rtx rperm[MAX_VECT_LEN], sel;
11718 machine_mode vmode = d->vmode;
11719 unsigned int i, nelt = d->nelt;
11721 if (d->testing_p)
11722 return true;
11724 /* Generic code will try constant permutation twice. Once with the
11725 original mode and again with the elements lowered to QImode.
11726 So wait and don't do the selector expansion ourselves. */
11727 if (vmode != V8QImode && vmode != V16QImode)
11728 return false;
11730 for (i = 0; i < nelt; ++i)
11732 int nunits = GET_MODE_NUNITS (vmode);
11734 /* If big-endian and two vectors we end up with a weird mixed-endian
11735 mode on NEON. Reverse the index within each word but not the word
11736 itself. */
11737 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
11738 : d->perm[i]);
11740 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
11741 sel = force_reg (vmode, sel);
11743 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
11744 return true;
11747 static bool
11748 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11750 /* The pattern matching functions above are written to look for a small
11751 number to begin the sequence (0, 1, N/2). If we begin with an index
11752 from the second operand, we can swap the operands. */
11753 if (d->perm[0] >= d->nelt)
11755 unsigned i, nelt = d->nelt;
11757 gcc_assert (nelt == (nelt & -nelt));
11758 for (i = 0; i < nelt; ++i)
11759 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
11761 std::swap (d->op0, d->op1);
11764 if (TARGET_SIMD)
11766 if (aarch64_evpc_rev (d))
11767 return true;
11768 else if (aarch64_evpc_ext (d))
11769 return true;
11770 else if (aarch64_evpc_dup (d))
11771 return true;
11772 else if (aarch64_evpc_zip (d))
11773 return true;
11774 else if (aarch64_evpc_uzp (d))
11775 return true;
11776 else if (aarch64_evpc_trn (d))
11777 return true;
11778 return aarch64_evpc_tbl (d);
11780 return false;
11783 /* Expand a vec_perm_const pattern. */
11785 bool
11786 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
11788 struct expand_vec_perm_d d;
11789 int i, nelt, which;
11791 d.target = target;
11792 d.op0 = op0;
11793 d.op1 = op1;
11795 d.vmode = GET_MODE (target);
11796 gcc_assert (VECTOR_MODE_P (d.vmode));
11797 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11798 d.testing_p = false;
11800 for (i = which = 0; i < nelt; ++i)
11802 rtx e = XVECEXP (sel, 0, i);
11803 int ei = INTVAL (e) & (2 * nelt - 1);
11804 which |= (ei < nelt ? 1 : 2);
11805 d.perm[i] = ei;
11808 switch (which)
11810 default:
11811 gcc_unreachable ();
11813 case 3:
11814 d.one_vector_p = false;
11815 if (!rtx_equal_p (op0, op1))
11816 break;
11818 /* The elements of PERM do not suggest that only the first operand
11819 is used, but both operands are identical. Allow easier matching
11820 of the permutation by folding the permutation into the single
11821 input vector. */
11822 /* Fall Through. */
11823 case 2:
11824 for (i = 0; i < nelt; ++i)
11825 d.perm[i] &= nelt - 1;
11826 d.op0 = op1;
11827 d.one_vector_p = true;
11828 break;
11830 case 1:
11831 d.op1 = op0;
11832 d.one_vector_p = true;
11833 break;
11836 return aarch64_expand_vec_perm_const_1 (&d);
11839 static bool
11840 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
11841 const unsigned char *sel)
11843 struct expand_vec_perm_d d;
11844 unsigned int i, nelt, which;
11845 bool ret;
11847 d.vmode = vmode;
11848 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11849 d.testing_p = true;
11850 memcpy (d.perm, sel, nelt);
11852 /* Calculate whether all elements are in one vector. */
11853 for (i = which = 0; i < nelt; ++i)
11855 unsigned char e = d.perm[i];
11856 gcc_assert (e < 2 * nelt);
11857 which |= (e < nelt ? 1 : 2);
11860 /* If all elements are from the second vector, reindex as if from the
11861 first vector. */
11862 if (which == 2)
11863 for (i = 0; i < nelt; ++i)
11864 d.perm[i] -= nelt;
11866 /* Check whether the mask can be applied to a single vector. */
11867 d.one_vector_p = (which != 3);
11869 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11870 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11871 if (!d.one_vector_p)
11872 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11874 start_sequence ();
11875 ret = aarch64_expand_vec_perm_const_1 (&d);
11876 end_sequence ();
11878 return ret;
11882 aarch64_reverse_mask (enum machine_mode mode)
11884 /* We have to reverse each vector because we dont have
11885 a permuted load that can reverse-load according to ABI rules. */
11886 rtx mask;
11887 rtvec v = rtvec_alloc (16);
11888 int i, j;
11889 int nunits = GET_MODE_NUNITS (mode);
11890 int usize = GET_MODE_UNIT_SIZE (mode);
11892 gcc_assert (BYTES_BIG_ENDIAN);
11893 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
11895 for (i = 0; i < nunits; i++)
11896 for (j = 0; j < usize; j++)
11897 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
11898 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
11899 return force_reg (V16QImode, mask);
11902 /* Implement MODES_TIEABLE_P. */
11904 bool
11905 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
11907 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
11908 return true;
11910 /* We specifically want to allow elements of "structure" modes to
11911 be tieable to the structure. This more general condition allows
11912 other rarer situations too. */
11913 if (TARGET_SIMD
11914 && aarch64_vector_mode_p (mode1)
11915 && aarch64_vector_mode_p (mode2))
11916 return true;
11918 return false;
11921 /* Return a new RTX holding the result of moving POINTER forward by
11922 AMOUNT bytes. */
11924 static rtx
11925 aarch64_move_pointer (rtx pointer, int amount)
11927 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
11929 return adjust_automodify_address (pointer, GET_MODE (pointer),
11930 next, amount);
11933 /* Return a new RTX holding the result of moving POINTER forward by the
11934 size of the mode it points to. */
11936 static rtx
11937 aarch64_progress_pointer (rtx pointer)
11939 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
11941 return aarch64_move_pointer (pointer, amount);
11944 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
11945 MODE bytes. */
11947 static void
11948 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
11949 machine_mode mode)
11951 rtx reg = gen_reg_rtx (mode);
11953 /* "Cast" the pointers to the correct mode. */
11954 *src = adjust_address (*src, mode, 0);
11955 *dst = adjust_address (*dst, mode, 0);
11956 /* Emit the memcpy. */
11957 emit_move_insn (reg, *src);
11958 emit_move_insn (*dst, reg);
11959 /* Move the pointers forward. */
11960 *src = aarch64_progress_pointer (*src);
11961 *dst = aarch64_progress_pointer (*dst);
11964 /* Expand movmem, as if from a __builtin_memcpy. Return true if
11965 we succeed, otherwise return false. */
11967 bool
11968 aarch64_expand_movmem (rtx *operands)
11970 unsigned int n;
11971 rtx dst = operands[0];
11972 rtx src = operands[1];
11973 rtx base;
11974 bool speed_p = !optimize_function_for_size_p (cfun);
11976 /* When optimizing for size, give a better estimate of the length of a
11977 memcpy call, but use the default otherwise. */
11978 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
11980 /* We can't do anything smart if the amount to copy is not constant. */
11981 if (!CONST_INT_P (operands[2]))
11982 return false;
11984 n = UINTVAL (operands[2]);
11986 /* Try to keep the number of instructions low. For cases below 16 bytes we
11987 need to make at most two moves. For cases above 16 bytes it will be one
11988 move for each 16 byte chunk, then at most two additional moves. */
11989 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
11990 return false;
11992 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11993 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
11995 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
11996 src = adjust_automodify_address (src, VOIDmode, base, 0);
11998 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
11999 1-byte chunk. */
12000 if (n < 4)
12002 if (n >= 2)
12004 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12005 n -= 2;
12008 if (n == 1)
12009 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12011 return true;
12014 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
12015 4-byte chunk, partially overlapping with the previously copied chunk. */
12016 if (n < 8)
12018 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12019 n -= 4;
12020 if (n > 0)
12022 int move = n - 4;
12024 src = aarch64_move_pointer (src, move);
12025 dst = aarch64_move_pointer (dst, move);
12026 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12028 return true;
12031 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
12032 them, then (if applicable) an 8-byte chunk. */
12033 while (n >= 8)
12035 if (n / 16)
12037 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
12038 n -= 16;
12040 else
12042 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
12043 n -= 8;
12047 /* Finish the final bytes of the copy. We can always do this in one
12048 instruction. We either copy the exact amount we need, or partially
12049 overlap with the previous chunk we copied and copy 8-bytes. */
12050 if (n == 0)
12051 return true;
12052 else if (n == 1)
12053 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
12054 else if (n == 2)
12055 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
12056 else if (n == 4)
12057 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12058 else
12060 if (n == 3)
12062 src = aarch64_move_pointer (src, -1);
12063 dst = aarch64_move_pointer (dst, -1);
12064 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
12066 else
12068 int move = n - 8;
12070 src = aarch64_move_pointer (src, move);
12071 dst = aarch64_move_pointer (dst, move);
12072 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
12076 return true;
12079 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12081 static unsigned HOST_WIDE_INT
12082 aarch64_asan_shadow_offset (void)
12084 return (HOST_WIDE_INT_1 << 36);
12087 static bool
12088 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12089 unsigned int align,
12090 enum by_pieces_operation op,
12091 bool speed_p)
12093 /* STORE_BY_PIECES can be used when copying a constant string, but
12094 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
12095 For now we always fail this and let the move_by_pieces code copy
12096 the string from read-only memory. */
12097 if (op == STORE_BY_PIECES)
12098 return false;
12100 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
12103 static enum machine_mode
12104 aarch64_code_to_ccmode (enum rtx_code code)
12106 switch (code)
12108 case NE:
12109 return CC_DNEmode;
12111 case EQ:
12112 return CC_DEQmode;
12114 case LE:
12115 return CC_DLEmode;
12117 case LT:
12118 return CC_DLTmode;
12120 case GE:
12121 return CC_DGEmode;
12123 case GT:
12124 return CC_DGTmode;
12126 case LEU:
12127 return CC_DLEUmode;
12129 case LTU:
12130 return CC_DLTUmode;
12132 case GEU:
12133 return CC_DGEUmode;
12135 case GTU:
12136 return CC_DGTUmode;
12138 default:
12139 return CCmode;
12143 static rtx
12144 aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq,
12145 int code, tree treeop0, tree treeop1)
12147 enum machine_mode op_mode, cmp_mode, cc_mode;
12148 rtx op0, op1, cmp, target;
12149 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
12150 enum insn_code icode;
12151 struct expand_operand ops[4];
12153 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code);
12154 if (cc_mode == CCmode)
12155 return NULL_RTX;
12157 start_sequence ();
12158 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
12160 op_mode = GET_MODE (op0);
12161 if (op_mode == VOIDmode)
12162 op_mode = GET_MODE (op1);
12164 switch (op_mode)
12166 case QImode:
12167 case HImode:
12168 case SImode:
12169 cmp_mode = SImode;
12170 icode = CODE_FOR_cmpsi;
12171 break;
12173 case DImode:
12174 cmp_mode = DImode;
12175 icode = CODE_FOR_cmpdi;
12176 break;
12178 default:
12179 end_sequence ();
12180 return NULL_RTX;
12183 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
12184 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
12185 if (!op0 || !op1)
12187 end_sequence ();
12188 return NULL_RTX;
12190 *prep_seq = get_insns ();
12191 end_sequence ();
12193 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1);
12194 target = gen_rtx_REG (CCmode, CC_REGNUM);
12196 create_output_operand (&ops[0], target, CCmode);
12197 create_fixed_operand (&ops[1], cmp);
12198 create_fixed_operand (&ops[2], op0);
12199 create_fixed_operand (&ops[3], op1);
12201 start_sequence ();
12202 if (!maybe_expand_insn (icode, 4, ops))
12204 end_sequence ();
12205 return NULL_RTX;
12207 *gen_seq = get_insns ();
12208 end_sequence ();
12210 return gen_rtx_REG (cc_mode, CC_REGNUM);
12213 static rtx
12214 aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code,
12215 tree treeop0, tree treeop1, int bit_code)
12217 rtx op0, op1, cmp0, cmp1, target;
12218 enum machine_mode op_mode, cmp_mode, cc_mode;
12219 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
12220 enum insn_code icode = CODE_FOR_ccmp_andsi;
12221 struct expand_operand ops[6];
12223 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code);
12224 if (cc_mode == CCmode)
12225 return NULL_RTX;
12227 push_to_sequence ((rtx_insn*) *prep_seq);
12228 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
12230 op_mode = GET_MODE (op0);
12231 if (op_mode == VOIDmode)
12232 op_mode = GET_MODE (op1);
12234 switch (op_mode)
12236 case QImode:
12237 case HImode:
12238 case SImode:
12239 cmp_mode = SImode;
12240 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi
12241 : CODE_FOR_ccmp_iorsi;
12242 break;
12244 case DImode:
12245 cmp_mode = DImode;
12246 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi
12247 : CODE_FOR_ccmp_iordi;
12248 break;
12250 default:
12251 end_sequence ();
12252 return NULL_RTX;
12255 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
12256 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
12257 if (!op0 || !op1)
12259 end_sequence ();
12260 return NULL_RTX;
12262 *prep_seq = get_insns ();
12263 end_sequence ();
12265 target = gen_rtx_REG (cc_mode, CC_REGNUM);
12266 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1);
12267 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx);
12269 create_fixed_operand (&ops[0], prev);
12270 create_fixed_operand (&ops[1], target);
12271 create_fixed_operand (&ops[2], op0);
12272 create_fixed_operand (&ops[3], op1);
12273 create_fixed_operand (&ops[4], cmp0);
12274 create_fixed_operand (&ops[5], cmp1);
12276 push_to_sequence ((rtx_insn*) *gen_seq);
12277 if (!maybe_expand_insn (icode, 6, ops))
12279 end_sequence ();
12280 return NULL_RTX;
12283 *gen_seq = get_insns ();
12284 end_sequence ();
12286 return target;
12289 #undef TARGET_GEN_CCMP_FIRST
12290 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
12292 #undef TARGET_GEN_CCMP_NEXT
12293 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
12295 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
12296 instruction fusion of some sort. */
12298 static bool
12299 aarch64_macro_fusion_p (void)
12301 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
12305 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
12306 should be kept together during scheduling. */
12308 static bool
12309 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
12311 rtx set_dest;
12312 rtx prev_set = single_set (prev);
12313 rtx curr_set = single_set (curr);
12314 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
12315 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
12317 if (!aarch64_macro_fusion_p ())
12318 return false;
12320 if (simple_sets_p
12321 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOV_MOVK))
12323 /* We are trying to match:
12324 prev (mov) == (set (reg r0) (const_int imm16))
12325 curr (movk) == (set (zero_extract (reg r0)
12326 (const_int 16)
12327 (const_int 16))
12328 (const_int imm16_1)) */
12330 set_dest = SET_DEST (curr_set);
12332 if (GET_CODE (set_dest) == ZERO_EXTRACT
12333 && CONST_INT_P (SET_SRC (curr_set))
12334 && CONST_INT_P (SET_SRC (prev_set))
12335 && CONST_INT_P (XEXP (set_dest, 2))
12336 && INTVAL (XEXP (set_dest, 2)) == 16
12337 && REG_P (XEXP (set_dest, 0))
12338 && REG_P (SET_DEST (prev_set))
12339 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
12341 return true;
12345 if (simple_sets_p
12346 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_ADD))
12349 /* We're trying to match:
12350 prev (adrp) == (set (reg r1)
12351 (high (symbol_ref ("SYM"))))
12352 curr (add) == (set (reg r0)
12353 (lo_sum (reg r1)
12354 (symbol_ref ("SYM"))))
12355 Note that r0 need not necessarily be the same as r1, especially
12356 during pre-regalloc scheduling. */
12358 if (satisfies_constraint_Ush (SET_SRC (prev_set))
12359 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
12361 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
12362 && REG_P (XEXP (SET_SRC (curr_set), 0))
12363 && REGNO (XEXP (SET_SRC (curr_set), 0))
12364 == REGNO (SET_DEST (prev_set))
12365 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
12366 XEXP (SET_SRC (curr_set), 1)))
12367 return true;
12371 if (simple_sets_p
12372 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_MOVK_MOVK))
12375 /* We're trying to match:
12376 prev (movk) == (set (zero_extract (reg r0)
12377 (const_int 16)
12378 (const_int 32))
12379 (const_int imm16_1))
12380 curr (movk) == (set (zero_extract (reg r0)
12381 (const_int 16)
12382 (const_int 48))
12383 (const_int imm16_2)) */
12385 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
12386 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
12387 && REG_P (XEXP (SET_DEST (prev_set), 0))
12388 && REG_P (XEXP (SET_DEST (curr_set), 0))
12389 && REGNO (XEXP (SET_DEST (prev_set), 0))
12390 == REGNO (XEXP (SET_DEST (curr_set), 0))
12391 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
12392 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
12393 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
12394 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
12395 && CONST_INT_P (SET_SRC (prev_set))
12396 && CONST_INT_P (SET_SRC (curr_set)))
12397 return true;
12400 if (simple_sets_p
12401 && (aarch64_tune_params.fusible_ops & AARCH64_FUSE_ADRP_LDR))
12403 /* We're trying to match:
12404 prev (adrp) == (set (reg r0)
12405 (high (symbol_ref ("SYM"))))
12406 curr (ldr) == (set (reg r1)
12407 (mem (lo_sum (reg r0)
12408 (symbol_ref ("SYM")))))
12410 curr (ldr) == (set (reg r1)
12411 (zero_extend (mem
12412 (lo_sum (reg r0)
12413 (symbol_ref ("SYM")))))) */
12414 if (satisfies_constraint_Ush (SET_SRC (prev_set))
12415 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
12417 rtx curr_src = SET_SRC (curr_set);
12419 if (GET_CODE (curr_src) == ZERO_EXTEND)
12420 curr_src = XEXP (curr_src, 0);
12422 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
12423 && REG_P (XEXP (XEXP (curr_src, 0), 0))
12424 && REGNO (XEXP (XEXP (curr_src, 0), 0))
12425 == REGNO (SET_DEST (prev_set))
12426 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
12427 XEXP (SET_SRC (prev_set), 0)))
12428 return true;
12432 if ((aarch64_tune_params.fusible_ops & AARCH64_FUSE_CMP_BRANCH)
12433 && any_condjump_p (curr))
12435 enum attr_type prev_type = get_attr_type (prev);
12437 /* FIXME: this misses some which is considered simple arthematic
12438 instructions for ThunderX. Simple shifts are missed here. */
12439 if (prev_type == TYPE_ALUS_SREG
12440 || prev_type == TYPE_ALUS_IMM
12441 || prev_type == TYPE_LOGICS_REG
12442 || prev_type == TYPE_LOGICS_IMM)
12443 return true;
12446 return false;
12449 /* If MEM is in the form of [base+offset], extract the two parts
12450 of address and set to BASE and OFFSET, otherwise return false
12451 after clearing BASE and OFFSET. */
12453 bool
12454 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
12456 rtx addr;
12458 gcc_assert (MEM_P (mem));
12460 addr = XEXP (mem, 0);
12462 if (REG_P (addr))
12464 *base = addr;
12465 *offset = const0_rtx;
12466 return true;
12469 if (GET_CODE (addr) == PLUS
12470 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
12472 *base = XEXP (addr, 0);
12473 *offset = XEXP (addr, 1);
12474 return true;
12477 *base = NULL_RTX;
12478 *offset = NULL_RTX;
12480 return false;
12483 /* Types for scheduling fusion. */
12484 enum sched_fusion_type
12486 SCHED_FUSION_NONE = 0,
12487 SCHED_FUSION_LD_SIGN_EXTEND,
12488 SCHED_FUSION_LD_ZERO_EXTEND,
12489 SCHED_FUSION_LD,
12490 SCHED_FUSION_ST,
12491 SCHED_FUSION_NUM
12494 /* If INSN is a load or store of address in the form of [base+offset],
12495 extract the two parts and set to BASE and OFFSET. Return scheduling
12496 fusion type this INSN is. */
12498 static enum sched_fusion_type
12499 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
12501 rtx x, dest, src;
12502 enum sched_fusion_type fusion = SCHED_FUSION_LD;
12504 gcc_assert (INSN_P (insn));
12505 x = PATTERN (insn);
12506 if (GET_CODE (x) != SET)
12507 return SCHED_FUSION_NONE;
12509 src = SET_SRC (x);
12510 dest = SET_DEST (x);
12512 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
12513 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
12514 return SCHED_FUSION_NONE;
12516 if (GET_CODE (src) == SIGN_EXTEND)
12518 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
12519 src = XEXP (src, 0);
12520 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
12521 return SCHED_FUSION_NONE;
12523 else if (GET_CODE (src) == ZERO_EXTEND)
12525 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
12526 src = XEXP (src, 0);
12527 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
12528 return SCHED_FUSION_NONE;
12531 if (GET_CODE (src) == MEM && REG_P (dest))
12532 extract_base_offset_in_addr (src, base, offset);
12533 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
12535 fusion = SCHED_FUSION_ST;
12536 extract_base_offset_in_addr (dest, base, offset);
12538 else
12539 return SCHED_FUSION_NONE;
12541 if (*base == NULL_RTX || *offset == NULL_RTX)
12542 fusion = SCHED_FUSION_NONE;
12544 return fusion;
12547 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
12549 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
12550 and PRI are only calculated for these instructions. For other instruction,
12551 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
12552 type instruction fusion can be added by returning different priorities.
12554 It's important that irrelevant instructions get the largest FUSION_PRI. */
12556 static void
12557 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
12558 int *fusion_pri, int *pri)
12560 int tmp, off_val;
12561 rtx base, offset;
12562 enum sched_fusion_type fusion;
12564 gcc_assert (INSN_P (insn));
12566 tmp = max_pri - 1;
12567 fusion = fusion_load_store (insn, &base, &offset);
12568 if (fusion == SCHED_FUSION_NONE)
12570 *pri = tmp;
12571 *fusion_pri = tmp;
12572 return;
12575 /* Set FUSION_PRI according to fusion type and base register. */
12576 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
12578 /* Calculate PRI. */
12579 tmp /= 2;
12581 /* INSN with smaller offset goes first. */
12582 off_val = (int)(INTVAL (offset));
12583 if (off_val >= 0)
12584 tmp -= (off_val & 0xfffff);
12585 else
12586 tmp += ((- off_val) & 0xfffff);
12588 *pri = tmp;
12589 return;
12592 /* Given OPERANDS of consecutive load/store, check if we can merge
12593 them into ldp/stp. LOAD is true if they are load instructions.
12594 MODE is the mode of memory operands. */
12596 bool
12597 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
12598 enum machine_mode mode)
12600 HOST_WIDE_INT offval_1, offval_2, msize;
12601 enum reg_class rclass_1, rclass_2;
12602 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
12604 if (load)
12606 mem_1 = operands[1];
12607 mem_2 = operands[3];
12608 reg_1 = operands[0];
12609 reg_2 = operands[2];
12610 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
12611 if (REGNO (reg_1) == REGNO (reg_2))
12612 return false;
12614 else
12616 mem_1 = operands[0];
12617 mem_2 = operands[2];
12618 reg_1 = operands[1];
12619 reg_2 = operands[3];
12622 /* The mems cannot be volatile. */
12623 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
12624 return false;
12626 /* Check if the addresses are in the form of [base+offset]. */
12627 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
12628 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
12629 return false;
12630 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
12631 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
12632 return false;
12634 /* Check if the bases are same. */
12635 if (!rtx_equal_p (base_1, base_2))
12636 return false;
12638 offval_1 = INTVAL (offset_1);
12639 offval_2 = INTVAL (offset_2);
12640 msize = GET_MODE_SIZE (mode);
12641 /* Check if the offsets are consecutive. */
12642 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
12643 return false;
12645 /* Check if the addresses are clobbered by load. */
12646 if (load)
12648 if (reg_mentioned_p (reg_1, mem_1))
12649 return false;
12651 /* In increasing order, the last load can clobber the address. */
12652 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
12653 return false;
12656 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
12657 rclass_1 = FP_REGS;
12658 else
12659 rclass_1 = GENERAL_REGS;
12661 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
12662 rclass_2 = FP_REGS;
12663 else
12664 rclass_2 = GENERAL_REGS;
12666 /* Check if the registers are of same class. */
12667 if (rclass_1 != rclass_2)
12668 return false;
12670 return true;
12673 /* Given OPERANDS of consecutive load/store, check if we can merge
12674 them into ldp/stp by adjusting the offset. LOAD is true if they
12675 are load instructions. MODE is the mode of memory operands.
12677 Given below consecutive stores:
12679 str w1, [xb, 0x100]
12680 str w1, [xb, 0x104]
12681 str w1, [xb, 0x108]
12682 str w1, [xb, 0x10c]
12684 Though the offsets are out of the range supported by stp, we can
12685 still pair them after adjusting the offset, like:
12687 add scratch, xb, 0x100
12688 stp w1, w1, [scratch]
12689 stp w1, w1, [scratch, 0x8]
12691 The peephole patterns detecting this opportunity should guarantee
12692 the scratch register is avaliable. */
12694 bool
12695 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
12696 enum machine_mode mode)
12698 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
12699 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
12700 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
12701 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
12703 if (load)
12705 reg_1 = operands[0];
12706 mem_1 = operands[1];
12707 reg_2 = operands[2];
12708 mem_2 = operands[3];
12709 reg_3 = operands[4];
12710 mem_3 = operands[5];
12711 reg_4 = operands[6];
12712 mem_4 = operands[7];
12713 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
12714 && REG_P (reg_3) && REG_P (reg_4));
12715 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
12716 return false;
12718 else
12720 mem_1 = operands[0];
12721 reg_1 = operands[1];
12722 mem_2 = operands[2];
12723 reg_2 = operands[3];
12724 mem_3 = operands[4];
12725 reg_3 = operands[5];
12726 mem_4 = operands[6];
12727 reg_4 = operands[7];
12729 /* Skip if memory operand is by itslef valid for ldp/stp. */
12730 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
12731 return false;
12733 /* The mems cannot be volatile. */
12734 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
12735 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
12736 return false;
12738 /* Check if the addresses are in the form of [base+offset]. */
12739 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
12740 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
12741 return false;
12742 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
12743 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
12744 return false;
12745 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
12746 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
12747 return false;
12748 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
12749 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
12750 return false;
12752 /* Check if the bases are same. */
12753 if (!rtx_equal_p (base_1, base_2)
12754 || !rtx_equal_p (base_2, base_3)
12755 || !rtx_equal_p (base_3, base_4))
12756 return false;
12758 offval_1 = INTVAL (offset_1);
12759 offval_2 = INTVAL (offset_2);
12760 offval_3 = INTVAL (offset_3);
12761 offval_4 = INTVAL (offset_4);
12762 msize = GET_MODE_SIZE (mode);
12763 /* Check if the offsets are consecutive. */
12764 if ((offval_1 != (offval_2 + msize)
12765 || offval_1 != (offval_3 + msize * 2)
12766 || offval_1 != (offval_4 + msize * 3))
12767 && (offval_4 != (offval_3 + msize)
12768 || offval_4 != (offval_2 + msize * 2)
12769 || offval_4 != (offval_1 + msize * 3)))
12770 return false;
12772 /* Check if the addresses are clobbered by load. */
12773 if (load)
12775 if (reg_mentioned_p (reg_1, mem_1)
12776 || reg_mentioned_p (reg_2, mem_2)
12777 || reg_mentioned_p (reg_3, mem_3))
12778 return false;
12780 /* In increasing order, the last load can clobber the address. */
12781 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
12782 return false;
12785 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
12786 rclass_1 = FP_REGS;
12787 else
12788 rclass_1 = GENERAL_REGS;
12790 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
12791 rclass_2 = FP_REGS;
12792 else
12793 rclass_2 = GENERAL_REGS;
12795 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
12796 rclass_3 = FP_REGS;
12797 else
12798 rclass_3 = GENERAL_REGS;
12800 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
12801 rclass_4 = FP_REGS;
12802 else
12803 rclass_4 = GENERAL_REGS;
12805 /* Check if the registers are of same class. */
12806 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
12807 return false;
12809 return true;
12812 /* Given OPERANDS of consecutive load/store, this function pairs them
12813 into ldp/stp after adjusting the offset. It depends on the fact
12814 that addresses of load/store instructions are in increasing order.
12815 MODE is the mode of memory operands. CODE is the rtl operator
12816 which should be applied to all memory operands, it's SIGN_EXTEND,
12817 ZERO_EXTEND or UNKNOWN. */
12819 bool
12820 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
12821 enum machine_mode mode, RTX_CODE code)
12823 rtx base, offset, t1, t2;
12824 rtx mem_1, mem_2, mem_3, mem_4;
12825 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
12827 if (load)
12829 mem_1 = operands[1];
12830 mem_2 = operands[3];
12831 mem_3 = operands[5];
12832 mem_4 = operands[7];
12834 else
12836 mem_1 = operands[0];
12837 mem_2 = operands[2];
12838 mem_3 = operands[4];
12839 mem_4 = operands[6];
12840 gcc_assert (code == UNKNOWN);
12843 extract_base_offset_in_addr (mem_1, &base, &offset);
12844 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
12846 /* Adjust offset thus it can fit in ldp/stp instruction. */
12847 msize = GET_MODE_SIZE (mode);
12848 stp_off_limit = msize * 0x40;
12849 off_val = INTVAL (offset);
12850 abs_off = (off_val < 0) ? -off_val : off_val;
12851 new_off = abs_off % stp_off_limit;
12852 adj_off = abs_off - new_off;
12854 /* Further adjust to make sure all offsets are OK. */
12855 if ((new_off + msize * 2) >= stp_off_limit)
12857 adj_off += stp_off_limit;
12858 new_off -= stp_off_limit;
12861 /* Make sure the adjustment can be done with ADD/SUB instructions. */
12862 if (adj_off >= 0x1000)
12863 return false;
12865 if (off_val < 0)
12867 adj_off = -adj_off;
12868 new_off = -new_off;
12871 /* Create new memory references. */
12872 mem_1 = change_address (mem_1, VOIDmode,
12873 plus_constant (DImode, operands[8], new_off));
12875 /* Check if the adjusted address is OK for ldp/stp. */
12876 if (!aarch64_mem_pair_operand (mem_1, mode))
12877 return false;
12879 msize = GET_MODE_SIZE (mode);
12880 mem_2 = change_address (mem_2, VOIDmode,
12881 plus_constant (DImode,
12882 operands[8],
12883 new_off + msize));
12884 mem_3 = change_address (mem_3, VOIDmode,
12885 plus_constant (DImode,
12886 operands[8],
12887 new_off + msize * 2));
12888 mem_4 = change_address (mem_4, VOIDmode,
12889 plus_constant (DImode,
12890 operands[8],
12891 new_off + msize * 3));
12893 if (code == ZERO_EXTEND)
12895 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
12896 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
12897 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
12898 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
12900 else if (code == SIGN_EXTEND)
12902 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
12903 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
12904 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
12905 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
12908 if (load)
12910 operands[1] = mem_1;
12911 operands[3] = mem_2;
12912 operands[5] = mem_3;
12913 operands[7] = mem_4;
12915 else
12917 operands[0] = mem_1;
12918 operands[2] = mem_2;
12919 operands[4] = mem_3;
12920 operands[6] = mem_4;
12923 /* Emit adjusting instruction. */
12924 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
12925 /* Emit ldp/stp instructions. */
12926 t1 = gen_rtx_SET (operands[0], operands[1]);
12927 t2 = gen_rtx_SET (operands[2], operands[3]);
12928 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
12929 t1 = gen_rtx_SET (operands[4], operands[5]);
12930 t2 = gen_rtx_SET (operands[6], operands[7]);
12931 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
12932 return true;
12935 /* Return 1 if pseudo register should be created and used to hold
12936 GOT address for PIC code. */
12938 bool
12939 aarch64_use_pseudo_pic_reg (void)
12941 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
12944 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
12946 static int
12947 aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
12949 switch (XINT (x, 1))
12951 case UNSPEC_GOTSMALLPIC:
12952 case UNSPEC_GOTSMALLPIC28K:
12953 case UNSPEC_GOTTINYPIC:
12954 return 0;
12955 default:
12956 break;
12959 return default_unspec_may_trap_p (x, flags);
12962 /* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
12963 static tree
12964 aarch64_promoted_type (const_tree t)
12966 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
12967 return float_type_node;
12968 return NULL_TREE;
12970 #undef TARGET_ADDRESS_COST
12971 #define TARGET_ADDRESS_COST aarch64_address_cost
12973 /* This hook will determines whether unnamed bitfields affect the alignment
12974 of the containing structure. The hook returns true if the structure
12975 should inherit the alignment requirements of an unnamed bitfield's
12976 type. */
12977 #undef TARGET_ALIGN_ANON_BITFIELD
12978 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
12980 #undef TARGET_ASM_ALIGNED_DI_OP
12981 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
12983 #undef TARGET_ASM_ALIGNED_HI_OP
12984 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
12986 #undef TARGET_ASM_ALIGNED_SI_OP
12987 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
12989 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12990 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
12991 hook_bool_const_tree_hwi_hwi_const_tree_true
12993 #undef TARGET_ASM_OUTPUT_MI_THUNK
12994 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
12996 #undef TARGET_ASM_SELECT_RTX_SECTION
12997 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
12999 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13000 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
13002 #undef TARGET_BUILD_BUILTIN_VA_LIST
13003 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
13005 #undef TARGET_CALLEE_COPIES
13006 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
13008 #undef TARGET_CAN_ELIMINATE
13009 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
13011 #undef TARGET_CAN_INLINE_P
13012 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
13014 #undef TARGET_CANNOT_FORCE_CONST_MEM
13015 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
13017 #undef TARGET_CONDITIONAL_REGISTER_USAGE
13018 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
13020 /* Only the least significant bit is used for initialization guard
13021 variables. */
13022 #undef TARGET_CXX_GUARD_MASK_BIT
13023 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
13025 #undef TARGET_C_MODE_FOR_SUFFIX
13026 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
13028 #ifdef TARGET_BIG_ENDIAN_DEFAULT
13029 #undef TARGET_DEFAULT_TARGET_FLAGS
13030 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
13031 #endif
13033 #undef TARGET_CLASS_MAX_NREGS
13034 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
13036 #undef TARGET_BUILTIN_DECL
13037 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
13039 #undef TARGET_EXPAND_BUILTIN
13040 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
13042 #undef TARGET_EXPAND_BUILTIN_VA_START
13043 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
13045 #undef TARGET_FOLD_BUILTIN
13046 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
13048 #undef TARGET_FUNCTION_ARG
13049 #define TARGET_FUNCTION_ARG aarch64_function_arg
13051 #undef TARGET_FUNCTION_ARG_ADVANCE
13052 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
13054 #undef TARGET_FUNCTION_ARG_BOUNDARY
13055 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
13057 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13058 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
13060 #undef TARGET_FUNCTION_VALUE
13061 #define TARGET_FUNCTION_VALUE aarch64_function_value
13063 #undef TARGET_FUNCTION_VALUE_REGNO_P
13064 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
13066 #undef TARGET_FRAME_POINTER_REQUIRED
13067 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
13069 #undef TARGET_GIMPLE_FOLD_BUILTIN
13070 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
13072 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13073 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
13075 #undef TARGET_INIT_BUILTINS
13076 #define TARGET_INIT_BUILTINS aarch64_init_builtins
13078 #undef TARGET_LEGITIMATE_ADDRESS_P
13079 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
13081 #undef TARGET_LEGITIMATE_CONSTANT_P
13082 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
13084 #undef TARGET_LIBGCC_CMP_RETURN_MODE
13085 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
13087 #undef TARGET_LRA_P
13088 #define TARGET_LRA_P hook_bool_void_true
13090 #undef TARGET_MANGLE_TYPE
13091 #define TARGET_MANGLE_TYPE aarch64_mangle_type
13093 #undef TARGET_MEMORY_MOVE_COST
13094 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
13096 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
13097 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
13099 #undef TARGET_MUST_PASS_IN_STACK
13100 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
13102 /* This target hook should return true if accesses to volatile bitfields
13103 should use the narrowest mode possible. It should return false if these
13104 accesses should use the bitfield container type. */
13105 #undef TARGET_NARROW_VOLATILE_BITFIELD
13106 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
13108 #undef TARGET_OPTION_OVERRIDE
13109 #define TARGET_OPTION_OVERRIDE aarch64_override_options
13111 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
13112 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
13113 aarch64_override_options_after_change
13115 #undef TARGET_OPTION_SAVE
13116 #define TARGET_OPTION_SAVE aarch64_option_save
13118 #undef TARGET_OPTION_RESTORE
13119 #define TARGET_OPTION_RESTORE aarch64_option_restore
13121 #undef TARGET_OPTION_PRINT
13122 #define TARGET_OPTION_PRINT aarch64_option_print
13124 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
13125 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
13127 #undef TARGET_SET_CURRENT_FUNCTION
13128 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
13130 #undef TARGET_PASS_BY_REFERENCE
13131 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
13133 #undef TARGET_PREFERRED_RELOAD_CLASS
13134 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
13136 #undef TARGET_SCHED_REASSOCIATION_WIDTH
13137 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
13139 #undef TARGET_PROMOTED_TYPE
13140 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
13142 #undef TARGET_SECONDARY_RELOAD
13143 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
13145 #undef TARGET_SHIFT_TRUNCATION_MASK
13146 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
13148 #undef TARGET_SETUP_INCOMING_VARARGS
13149 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
13151 #undef TARGET_STRUCT_VALUE_RTX
13152 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
13154 #undef TARGET_REGISTER_MOVE_COST
13155 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
13157 #undef TARGET_RETURN_IN_MEMORY
13158 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
13160 #undef TARGET_RETURN_IN_MSB
13161 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
13163 #undef TARGET_RTX_COSTS
13164 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
13166 #undef TARGET_SCHED_ISSUE_RATE
13167 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
13169 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13170 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
13171 aarch64_sched_first_cycle_multipass_dfa_lookahead
13173 #undef TARGET_TRAMPOLINE_INIT
13174 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
13176 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
13177 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
13179 #undef TARGET_VECTOR_MODE_SUPPORTED_P
13180 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
13182 #undef TARGET_ARRAY_MODE_SUPPORTED_P
13183 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
13185 #undef TARGET_VECTORIZE_ADD_STMT_COST
13186 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
13188 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
13189 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
13190 aarch64_builtin_vectorization_cost
13192 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
13193 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
13195 #undef TARGET_VECTORIZE_BUILTINS
13196 #define TARGET_VECTORIZE_BUILTINS
13198 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
13199 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
13200 aarch64_builtin_vectorized_function
13202 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
13203 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
13204 aarch64_autovectorize_vector_sizes
13206 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
13207 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
13208 aarch64_atomic_assign_expand_fenv
13210 /* Section anchor support. */
13212 #undef TARGET_MIN_ANCHOR_OFFSET
13213 #define TARGET_MIN_ANCHOR_OFFSET -256
13215 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
13216 byte offset; we can do much more for larger data types, but have no way
13217 to determine the size of the access. We assume accesses are aligned. */
13218 #undef TARGET_MAX_ANCHOR_OFFSET
13219 #define TARGET_MAX_ANCHOR_OFFSET 4095
13221 #undef TARGET_VECTOR_ALIGNMENT
13222 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
13224 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
13225 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
13226 aarch64_simd_vector_alignment_reachable
13228 /* vec_perm support. */
13230 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
13231 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
13232 aarch64_vectorize_vec_perm_const_ok
13234 #undef TARGET_INIT_LIBFUNCS
13235 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
13237 #undef TARGET_FIXED_CONDITION_CODE_REGS
13238 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
13240 #undef TARGET_FLAGS_REGNUM
13241 #define TARGET_FLAGS_REGNUM CC_REGNUM
13243 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
13244 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
13246 #undef TARGET_ASAN_SHADOW_OFFSET
13247 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
13249 #undef TARGET_LEGITIMIZE_ADDRESS
13250 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
13252 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
13253 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
13254 aarch64_use_by_pieces_infrastructure_p
13256 #undef TARGET_CAN_USE_DOLOOP_P
13257 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
13259 #undef TARGET_SCHED_MACRO_FUSION_P
13260 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
13262 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
13263 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
13265 #undef TARGET_SCHED_FUSION_PRIORITY
13266 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
13268 #undef TARGET_UNSPEC_MAY_TRAP_P
13269 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
13271 #undef TARGET_USE_PSEUDO_PIC_REG
13272 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
13274 struct gcc_target targetm = TARGET_INITIALIZER;
13276 #include "gt-aarch64.h"