[AArch64] Fix ICEs in aarch64_print_operand
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob83d86071312a25789dc40f4f236fec5260d04c87
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #define INCLUDE_STRING
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic.h"
42 #include "insn-attr.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "flags.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "reload.h"
53 #include "langhooks.h"
54 #include "opts.h"
55 #include "params.h"
56 #include "gimplify.h"
57 #include "dwarf2.h"
58 #include "gimple-iterator.h"
59 #include "tree-vectorizer.h"
60 #include "aarch64-cost-tables.h"
61 #include "dumpfile.h"
62 #include "builtins.h"
63 #include "rtl-iter.h"
64 #include "tm-constrs.h"
65 #include "sched-int.h"
66 #include "target-globals.h"
67 #include "common/common-target.h"
68 #include "selftest.h"
69 #include "selftest-rtl.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Defined for convenience. */
75 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
77 /* Classifies an address.
79 ADDRESS_REG_IMM
80 A simple base register plus immediate offset.
82 ADDRESS_REG_WB
83 A base register indexed by immediate offset with writeback.
85 ADDRESS_REG_REG
86 A base register indexed by (optionally scaled) register.
88 ADDRESS_REG_UXTW
89 A base register indexed by (optionally scaled) zero-extended register.
91 ADDRESS_REG_SXTW
92 A base register indexed by (optionally scaled) sign-extended register.
94 ADDRESS_LO_SUM
95 A LO_SUM rtx with a base register and "LO12" symbol relocation.
97 ADDRESS_SYMBOLIC:
98 A constant symbolic address, in pc-relative literal pool. */
100 enum aarch64_address_type {
101 ADDRESS_REG_IMM,
102 ADDRESS_REG_WB,
103 ADDRESS_REG_REG,
104 ADDRESS_REG_UXTW,
105 ADDRESS_REG_SXTW,
106 ADDRESS_LO_SUM,
107 ADDRESS_SYMBOLIC
110 struct aarch64_address_info {
111 enum aarch64_address_type type;
112 rtx base;
113 rtx offset;
114 int shift;
115 enum aarch64_symbol_type symbol_type;
118 struct simd_immediate_info
120 rtx value;
121 int shift;
122 int element_width;
123 bool mvn;
124 bool msl;
127 /* The current code model. */
128 enum aarch64_code_model aarch64_cmodel;
130 #ifdef HAVE_AS_TLS
131 #undef TARGET_HAVE_TLS
132 #define TARGET_HAVE_TLS 1
133 #endif
135 static bool aarch64_composite_type_p (const_tree, machine_mode);
136 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
137 const_tree,
138 machine_mode *, int *,
139 bool *);
140 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
141 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
142 static void aarch64_override_options_after_change (void);
143 static bool aarch64_vector_mode_supported_p (machine_mode);
144 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode,
145 vec_perm_indices);
146 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
147 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
148 const_tree type,
149 int misalignment,
150 bool is_packed);
151 static machine_mode
152 aarch64_simd_container_mode (scalar_mode mode, unsigned width);
153 static bool aarch64_print_ldpstp_address (FILE *, machine_mode, rtx);
155 /* Major revision number of the ARM Architecture implemented by the target. */
156 unsigned aarch64_architecture_version;
158 /* The processor for which instructions should be scheduled. */
159 enum aarch64_processor aarch64_tune = cortexa53;
161 /* Mask to specify which instruction scheduling options should be used. */
162 unsigned long aarch64_tune_flags = 0;
164 /* Global flag for PC relative loads. */
165 bool aarch64_pcrelative_literal_loads;
167 /* Support for command line parsing of boolean flags in the tuning
168 structures. */
169 struct aarch64_flag_desc
171 const char* name;
172 unsigned int flag;
175 #define AARCH64_FUSION_PAIR(name, internal_name) \
176 { name, AARCH64_FUSE_##internal_name },
177 static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
179 { "none", AARCH64_FUSE_NOTHING },
180 #include "aarch64-fusion-pairs.def"
181 { "all", AARCH64_FUSE_ALL },
182 { NULL, AARCH64_FUSE_NOTHING }
185 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
186 { name, AARCH64_EXTRA_TUNE_##internal_name },
187 static const struct aarch64_flag_desc aarch64_tuning_flags[] =
189 { "none", AARCH64_EXTRA_TUNE_NONE },
190 #include "aarch64-tuning-flags.def"
191 { "all", AARCH64_EXTRA_TUNE_ALL },
192 { NULL, AARCH64_EXTRA_TUNE_NONE }
195 /* Tuning parameters. */
197 static const struct cpu_addrcost_table generic_addrcost_table =
200 1, /* hi */
201 0, /* si */
202 0, /* di */
203 1, /* ti */
205 0, /* pre_modify */
206 0, /* post_modify */
207 0, /* register_offset */
208 0, /* register_sextend */
209 0, /* register_zextend */
210 0 /* imm_offset */
213 static const struct cpu_addrcost_table exynosm1_addrcost_table =
216 0, /* hi */
217 0, /* si */
218 0, /* di */
219 2, /* ti */
221 0, /* pre_modify */
222 0, /* post_modify */
223 1, /* register_offset */
224 1, /* register_sextend */
225 2, /* register_zextend */
226 0, /* imm_offset */
229 static const struct cpu_addrcost_table xgene1_addrcost_table =
232 1, /* hi */
233 0, /* si */
234 0, /* di */
235 1, /* ti */
237 1, /* pre_modify */
238 0, /* post_modify */
239 0, /* register_offset */
240 1, /* register_sextend */
241 1, /* register_zextend */
242 0, /* imm_offset */
245 static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
248 1, /* hi */
249 1, /* si */
250 1, /* di */
251 2, /* ti */
253 0, /* pre_modify */
254 0, /* post_modify */
255 2, /* register_offset */
256 3, /* register_sextend */
257 3, /* register_zextend */
258 0, /* imm_offset */
261 static const struct cpu_regmove_cost generic_regmove_cost =
263 1, /* GP2GP */
264 /* Avoid the use of slow int<->fp moves for spilling by setting
265 their cost higher than memmov_cost. */
266 5, /* GP2FP */
267 5, /* FP2GP */
268 2 /* FP2FP */
271 static const struct cpu_regmove_cost cortexa57_regmove_cost =
273 1, /* GP2GP */
274 /* Avoid the use of slow int<->fp moves for spilling by setting
275 their cost higher than memmov_cost. */
276 5, /* GP2FP */
277 5, /* FP2GP */
278 2 /* FP2FP */
281 static const struct cpu_regmove_cost cortexa53_regmove_cost =
283 1, /* GP2GP */
284 /* Avoid the use of slow int<->fp moves for spilling by setting
285 their cost higher than memmov_cost. */
286 5, /* GP2FP */
287 5, /* FP2GP */
288 2 /* FP2FP */
291 static const struct cpu_regmove_cost exynosm1_regmove_cost =
293 1, /* GP2GP */
294 /* Avoid the use of slow int<->fp moves for spilling by setting
295 their cost higher than memmov_cost (actual, 4 and 9). */
296 9, /* GP2FP */
297 9, /* FP2GP */
298 1 /* FP2FP */
301 static const struct cpu_regmove_cost thunderx_regmove_cost =
303 2, /* GP2GP */
304 2, /* GP2FP */
305 6, /* FP2GP */
306 4 /* FP2FP */
309 static const struct cpu_regmove_cost xgene1_regmove_cost =
311 1, /* GP2GP */
312 /* Avoid the use of slow int<->fp moves for spilling by setting
313 their cost higher than memmov_cost. */
314 8, /* GP2FP */
315 8, /* FP2GP */
316 2 /* FP2FP */
319 static const struct cpu_regmove_cost qdf24xx_regmove_cost =
321 2, /* GP2GP */
322 /* Avoid the use of int<->fp moves for spilling. */
323 6, /* GP2FP */
324 6, /* FP2GP */
325 4 /* FP2FP */
328 static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
330 1, /* GP2GP */
331 /* Avoid the use of int<->fp moves for spilling. */
332 8, /* GP2FP */
333 8, /* FP2GP */
334 4 /* FP2FP */
337 /* Generic costs for vector insn classes. */
338 static const struct cpu_vector_cost generic_vector_cost =
340 1, /* scalar_int_stmt_cost */
341 1, /* scalar_fp_stmt_cost */
342 1, /* scalar_load_cost */
343 1, /* scalar_store_cost */
344 1, /* vec_int_stmt_cost */
345 1, /* vec_fp_stmt_cost */
346 2, /* vec_permute_cost */
347 1, /* vec_to_scalar_cost */
348 1, /* scalar_to_vec_cost */
349 1, /* vec_align_load_cost */
350 1, /* vec_unalign_load_cost */
351 1, /* vec_unalign_store_cost */
352 1, /* vec_store_cost */
353 3, /* cond_taken_branch_cost */
354 1 /* cond_not_taken_branch_cost */
357 /* ThunderX costs for vector insn classes. */
358 static const struct cpu_vector_cost thunderx_vector_cost =
360 1, /* scalar_int_stmt_cost */
361 1, /* scalar_fp_stmt_cost */
362 3, /* scalar_load_cost */
363 1, /* scalar_store_cost */
364 4, /* vec_int_stmt_cost */
365 1, /* vec_fp_stmt_cost */
366 4, /* vec_permute_cost */
367 2, /* vec_to_scalar_cost */
368 2, /* scalar_to_vec_cost */
369 3, /* vec_align_load_cost */
370 5, /* vec_unalign_load_cost */
371 5, /* vec_unalign_store_cost */
372 1, /* vec_store_cost */
373 3, /* cond_taken_branch_cost */
374 3 /* cond_not_taken_branch_cost */
377 /* Generic costs for vector insn classes. */
378 static const struct cpu_vector_cost cortexa57_vector_cost =
380 1, /* scalar_int_stmt_cost */
381 1, /* scalar_fp_stmt_cost */
382 4, /* scalar_load_cost */
383 1, /* scalar_store_cost */
384 2, /* vec_int_stmt_cost */
385 2, /* vec_fp_stmt_cost */
386 3, /* vec_permute_cost */
387 8, /* vec_to_scalar_cost */
388 8, /* scalar_to_vec_cost */
389 4, /* vec_align_load_cost */
390 4, /* vec_unalign_load_cost */
391 1, /* vec_unalign_store_cost */
392 1, /* vec_store_cost */
393 1, /* cond_taken_branch_cost */
394 1 /* cond_not_taken_branch_cost */
397 static const struct cpu_vector_cost exynosm1_vector_cost =
399 1, /* scalar_int_stmt_cost */
400 1, /* scalar_fp_stmt_cost */
401 5, /* scalar_load_cost */
402 1, /* scalar_store_cost */
403 3, /* vec_int_stmt_cost */
404 3, /* vec_fp_stmt_cost */
405 3, /* vec_permute_cost */
406 3, /* vec_to_scalar_cost */
407 3, /* scalar_to_vec_cost */
408 5, /* vec_align_load_cost */
409 5, /* vec_unalign_load_cost */
410 1, /* vec_unalign_store_cost */
411 1, /* vec_store_cost */
412 1, /* cond_taken_branch_cost */
413 1 /* cond_not_taken_branch_cost */
416 /* Generic costs for vector insn classes. */
417 static const struct cpu_vector_cost xgene1_vector_cost =
419 1, /* scalar_int_stmt_cost */
420 1, /* scalar_fp_stmt_cost */
421 5, /* scalar_load_cost */
422 1, /* scalar_store_cost */
423 2, /* vec_int_stmt_cost */
424 2, /* vec_fp_stmt_cost */
425 2, /* vec_permute_cost */
426 4, /* vec_to_scalar_cost */
427 4, /* scalar_to_vec_cost */
428 10, /* vec_align_load_cost */
429 10, /* vec_unalign_load_cost */
430 2, /* vec_unalign_store_cost */
431 2, /* vec_store_cost */
432 2, /* cond_taken_branch_cost */
433 1 /* cond_not_taken_branch_cost */
436 /* Costs for vector insn classes for Vulcan. */
437 static const struct cpu_vector_cost thunderx2t99_vector_cost =
439 1, /* scalar_int_stmt_cost */
440 6, /* scalar_fp_stmt_cost */
441 4, /* scalar_load_cost */
442 1, /* scalar_store_cost */
443 5, /* vec_int_stmt_cost */
444 6, /* vec_fp_stmt_cost */
445 3, /* vec_permute_cost */
446 6, /* vec_to_scalar_cost */
447 5, /* scalar_to_vec_cost */
448 8, /* vec_align_load_cost */
449 8, /* vec_unalign_load_cost */
450 4, /* vec_unalign_store_cost */
451 4, /* vec_store_cost */
452 2, /* cond_taken_branch_cost */
453 1 /* cond_not_taken_branch_cost */
456 /* Generic costs for branch instructions. */
457 static const struct cpu_branch_cost generic_branch_cost =
459 1, /* Predictable. */
460 3 /* Unpredictable. */
463 /* Generic approximation modes. */
464 static const cpu_approx_modes generic_approx_modes =
466 AARCH64_APPROX_NONE, /* division */
467 AARCH64_APPROX_NONE, /* sqrt */
468 AARCH64_APPROX_NONE /* recip_sqrt */
471 /* Approximation modes for Exynos M1. */
472 static const cpu_approx_modes exynosm1_approx_modes =
474 AARCH64_APPROX_NONE, /* division */
475 AARCH64_APPROX_ALL, /* sqrt */
476 AARCH64_APPROX_ALL /* recip_sqrt */
479 /* Approximation modes for X-Gene 1. */
480 static const cpu_approx_modes xgene1_approx_modes =
482 AARCH64_APPROX_NONE, /* division */
483 AARCH64_APPROX_NONE, /* sqrt */
484 AARCH64_APPROX_ALL /* recip_sqrt */
487 /* Generic prefetch settings (which disable prefetch). */
488 static const cpu_prefetch_tune generic_prefetch_tune =
490 0, /* num_slots */
491 -1, /* l1_cache_size */
492 -1, /* l1_cache_line_size */
493 -1, /* l2_cache_size */
494 -1 /* default_opt_level */
497 static const cpu_prefetch_tune exynosm1_prefetch_tune =
499 0, /* num_slots */
500 -1, /* l1_cache_size */
501 64, /* l1_cache_line_size */
502 -1, /* l2_cache_size */
503 -1 /* default_opt_level */
506 static const cpu_prefetch_tune qdf24xx_prefetch_tune =
508 4, /* num_slots */
509 32, /* l1_cache_size */
510 64, /* l1_cache_line_size */
511 1024, /* l2_cache_size */
512 -1 /* default_opt_level */
515 static const cpu_prefetch_tune thunderxt88_prefetch_tune =
517 8, /* num_slots */
518 32, /* l1_cache_size */
519 128, /* l1_cache_line_size */
520 16*1024, /* l2_cache_size */
521 3 /* default_opt_level */
524 static const cpu_prefetch_tune thunderx_prefetch_tune =
526 8, /* num_slots */
527 32, /* l1_cache_size */
528 128, /* l1_cache_line_size */
529 -1, /* l2_cache_size */
530 -1 /* default_opt_level */
533 static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
535 8, /* num_slots */
536 32, /* l1_cache_size */
537 64, /* l1_cache_line_size */
538 256, /* l2_cache_size */
539 -1 /* default_opt_level */
542 static const struct tune_params generic_tunings =
544 &cortexa57_extra_costs,
545 &generic_addrcost_table,
546 &generic_regmove_cost,
547 &generic_vector_cost,
548 &generic_branch_cost,
549 &generic_approx_modes,
550 4, /* memmov_cost */
551 2, /* issue_rate */
552 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
553 8, /* function_align. */
554 4, /* jump_align. */
555 8, /* loop_align. */
556 2, /* int_reassoc_width. */
557 4, /* fp_reassoc_width. */
558 1, /* vec_reassoc_width. */
559 2, /* min_div_recip_mul_sf. */
560 2, /* min_div_recip_mul_df. */
561 0, /* max_case_values. */
562 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
563 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
564 &generic_prefetch_tune
567 static const struct tune_params cortexa35_tunings =
569 &cortexa53_extra_costs,
570 &generic_addrcost_table,
571 &cortexa53_regmove_cost,
572 &generic_vector_cost,
573 &generic_branch_cost,
574 &generic_approx_modes,
575 4, /* memmov_cost */
576 1, /* issue_rate */
577 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
578 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
579 16, /* function_align. */
580 4, /* jump_align. */
581 8, /* loop_align. */
582 2, /* int_reassoc_width. */
583 4, /* fp_reassoc_width. */
584 1, /* vec_reassoc_width. */
585 2, /* min_div_recip_mul_sf. */
586 2, /* min_div_recip_mul_df. */
587 0, /* max_case_values. */
588 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
589 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
590 &generic_prefetch_tune
593 static const struct tune_params cortexa53_tunings =
595 &cortexa53_extra_costs,
596 &generic_addrcost_table,
597 &cortexa53_regmove_cost,
598 &generic_vector_cost,
599 &generic_branch_cost,
600 &generic_approx_modes,
601 4, /* memmov_cost */
602 2, /* issue_rate */
603 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
604 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
605 16, /* function_align. */
606 4, /* jump_align. */
607 8, /* loop_align. */
608 2, /* int_reassoc_width. */
609 4, /* fp_reassoc_width. */
610 1, /* vec_reassoc_width. */
611 2, /* min_div_recip_mul_sf. */
612 2, /* min_div_recip_mul_df. */
613 0, /* max_case_values. */
614 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
615 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
616 &generic_prefetch_tune
619 static const struct tune_params cortexa57_tunings =
621 &cortexa57_extra_costs,
622 &generic_addrcost_table,
623 &cortexa57_regmove_cost,
624 &cortexa57_vector_cost,
625 &generic_branch_cost,
626 &generic_approx_modes,
627 4, /* memmov_cost */
628 3, /* issue_rate */
629 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
630 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
631 16, /* function_align. */
632 4, /* jump_align. */
633 8, /* loop_align. */
634 2, /* int_reassoc_width. */
635 4, /* fp_reassoc_width. */
636 1, /* vec_reassoc_width. */
637 2, /* min_div_recip_mul_sf. */
638 2, /* min_div_recip_mul_df. */
639 0, /* max_case_values. */
640 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
641 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
642 &generic_prefetch_tune
645 static const struct tune_params cortexa72_tunings =
647 &cortexa57_extra_costs,
648 &generic_addrcost_table,
649 &cortexa57_regmove_cost,
650 &cortexa57_vector_cost,
651 &generic_branch_cost,
652 &generic_approx_modes,
653 4, /* memmov_cost */
654 3, /* issue_rate */
655 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
656 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
657 16, /* function_align. */
658 4, /* jump_align. */
659 8, /* loop_align. */
660 2, /* int_reassoc_width. */
661 4, /* fp_reassoc_width. */
662 1, /* vec_reassoc_width. */
663 2, /* min_div_recip_mul_sf. */
664 2, /* min_div_recip_mul_df. */
665 0, /* max_case_values. */
666 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
667 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
668 &generic_prefetch_tune
671 static const struct tune_params cortexa73_tunings =
673 &cortexa57_extra_costs,
674 &generic_addrcost_table,
675 &cortexa57_regmove_cost,
676 &cortexa57_vector_cost,
677 &generic_branch_cost,
678 &generic_approx_modes,
679 4, /* memmov_cost. */
680 2, /* issue_rate. */
681 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
682 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
683 16, /* function_align. */
684 4, /* jump_align. */
685 8, /* loop_align. */
686 2, /* int_reassoc_width. */
687 4, /* fp_reassoc_width. */
688 1, /* vec_reassoc_width. */
689 2, /* min_div_recip_mul_sf. */
690 2, /* min_div_recip_mul_df. */
691 0, /* max_case_values. */
692 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
693 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
694 &generic_prefetch_tune
699 static const struct tune_params exynosm1_tunings =
701 &exynosm1_extra_costs,
702 &exynosm1_addrcost_table,
703 &exynosm1_regmove_cost,
704 &exynosm1_vector_cost,
705 &generic_branch_cost,
706 &exynosm1_approx_modes,
707 4, /* memmov_cost */
708 3, /* issue_rate */
709 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
710 4, /* function_align. */
711 4, /* jump_align. */
712 4, /* loop_align. */
713 2, /* int_reassoc_width. */
714 4, /* fp_reassoc_width. */
715 1, /* vec_reassoc_width. */
716 2, /* min_div_recip_mul_sf. */
717 2, /* min_div_recip_mul_df. */
718 48, /* max_case_values. */
719 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
720 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
721 &exynosm1_prefetch_tune
724 static const struct tune_params thunderxt88_tunings =
726 &thunderx_extra_costs,
727 &generic_addrcost_table,
728 &thunderx_regmove_cost,
729 &thunderx_vector_cost,
730 &generic_branch_cost,
731 &generic_approx_modes,
732 6, /* memmov_cost */
733 2, /* issue_rate */
734 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
735 8, /* function_align. */
736 8, /* jump_align. */
737 8, /* loop_align. */
738 2, /* int_reassoc_width. */
739 4, /* fp_reassoc_width. */
740 1, /* vec_reassoc_width. */
741 2, /* min_div_recip_mul_sf. */
742 2, /* min_div_recip_mul_df. */
743 0, /* max_case_values. */
744 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
745 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
746 &thunderxt88_prefetch_tune
749 static const struct tune_params thunderx_tunings =
751 &thunderx_extra_costs,
752 &generic_addrcost_table,
753 &thunderx_regmove_cost,
754 &thunderx_vector_cost,
755 &generic_branch_cost,
756 &generic_approx_modes,
757 6, /* memmov_cost */
758 2, /* issue_rate */
759 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
760 8, /* function_align. */
761 8, /* jump_align. */
762 8, /* loop_align. */
763 2, /* int_reassoc_width. */
764 4, /* fp_reassoc_width. */
765 1, /* vec_reassoc_width. */
766 2, /* min_div_recip_mul_sf. */
767 2, /* min_div_recip_mul_df. */
768 0, /* max_case_values. */
769 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
770 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
771 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
772 &thunderx_prefetch_tune
775 static const struct tune_params xgene1_tunings =
777 &xgene1_extra_costs,
778 &xgene1_addrcost_table,
779 &xgene1_regmove_cost,
780 &xgene1_vector_cost,
781 &generic_branch_cost,
782 &xgene1_approx_modes,
783 6, /* memmov_cost */
784 4, /* issue_rate */
785 AARCH64_FUSE_NOTHING, /* fusible_ops */
786 16, /* function_align. */
787 8, /* jump_align. */
788 16, /* loop_align. */
789 2, /* int_reassoc_width. */
790 4, /* fp_reassoc_width. */
791 1, /* vec_reassoc_width. */
792 2, /* min_div_recip_mul_sf. */
793 2, /* min_div_recip_mul_df. */
794 0, /* max_case_values. */
795 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
796 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
797 &generic_prefetch_tune
800 static const struct tune_params qdf24xx_tunings =
802 &qdf24xx_extra_costs,
803 &generic_addrcost_table,
804 &qdf24xx_regmove_cost,
805 &generic_vector_cost,
806 &generic_branch_cost,
807 &generic_approx_modes,
808 4, /* memmov_cost */
809 4, /* issue_rate */
810 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
811 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
812 16, /* function_align. */
813 8, /* jump_align. */
814 16, /* loop_align. */
815 2, /* int_reassoc_width. */
816 4, /* fp_reassoc_width. */
817 1, /* vec_reassoc_width. */
818 2, /* min_div_recip_mul_sf. */
819 2, /* min_div_recip_mul_df. */
820 0, /* max_case_values. */
821 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
822 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
823 &qdf24xx_prefetch_tune
826 /* Tuning structure for the Qualcomm Saphira core. Default to falkor values
827 for now. */
828 static const struct tune_params saphira_tunings =
830 &generic_extra_costs,
831 &generic_addrcost_table,
832 &generic_regmove_cost,
833 &generic_vector_cost,
834 &generic_branch_cost,
835 &generic_approx_modes,
836 4, /* memmov_cost */
837 4, /* issue_rate */
838 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
839 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
840 16, /* function_align. */
841 8, /* jump_align. */
842 16, /* loop_align. */
843 2, /* int_reassoc_width. */
844 4, /* fp_reassoc_width. */
845 1, /* vec_reassoc_width. */
846 2, /* min_div_recip_mul_sf. */
847 2, /* min_div_recip_mul_df. */
848 0, /* max_case_values. */
849 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
850 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
851 &generic_prefetch_tune
854 static const struct tune_params thunderx2t99_tunings =
856 &thunderx2t99_extra_costs,
857 &thunderx2t99_addrcost_table,
858 &thunderx2t99_regmove_cost,
859 &thunderx2t99_vector_cost,
860 &generic_branch_cost,
861 &generic_approx_modes,
862 4, /* memmov_cost. */
863 4, /* issue_rate. */
864 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
865 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
866 16, /* function_align. */
867 8, /* jump_align. */
868 16, /* loop_align. */
869 3, /* int_reassoc_width. */
870 2, /* fp_reassoc_width. */
871 2, /* vec_reassoc_width. */
872 2, /* min_div_recip_mul_sf. */
873 2, /* min_div_recip_mul_df. */
874 0, /* max_case_values. */
875 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
876 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
877 &thunderx2t99_prefetch_tune
880 /* Support for fine-grained override of the tuning structures. */
881 struct aarch64_tuning_override_function
883 const char* name;
884 void (*parse_override)(const char*, struct tune_params*);
887 static void aarch64_parse_fuse_string (const char*, struct tune_params*);
888 static void aarch64_parse_tune_string (const char*, struct tune_params*);
890 static const struct aarch64_tuning_override_function
891 aarch64_tuning_override_functions[] =
893 { "fuse", aarch64_parse_fuse_string },
894 { "tune", aarch64_parse_tune_string },
895 { NULL, NULL }
898 /* A processor implementing AArch64. */
899 struct processor
901 const char *const name;
902 enum aarch64_processor ident;
903 enum aarch64_processor sched_core;
904 enum aarch64_arch arch;
905 unsigned architecture_version;
906 const unsigned long flags;
907 const struct tune_params *const tune;
910 /* Architectures implementing AArch64. */
911 static const struct processor all_architectures[] =
913 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
914 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
915 #include "aarch64-arches.def"
916 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
919 /* Processor cores implementing AArch64. */
920 static const struct processor all_cores[] =
922 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
923 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
924 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
925 FLAGS, &COSTS##_tunings},
926 #include "aarch64-cores.def"
927 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
928 AARCH64_FL_FOR_ARCH8, &generic_tunings},
929 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
933 /* Target specification. These are populated by the -march, -mtune, -mcpu
934 handling code or by target attributes. */
935 static const struct processor *selected_arch;
936 static const struct processor *selected_cpu;
937 static const struct processor *selected_tune;
939 /* The current tuning set. */
940 struct tune_params aarch64_tune_params = generic_tunings;
942 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
944 /* An ISA extension in the co-processor and main instruction set space. */
945 struct aarch64_option_extension
947 const char *const name;
948 const unsigned long flags_on;
949 const unsigned long flags_off;
952 typedef enum aarch64_cond_code
954 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
955 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
956 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
958 aarch64_cc;
960 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
962 /* The condition codes of the processor, and the inverse function. */
963 static const char * const aarch64_condition_codes[] =
965 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
966 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
969 /* Generate code to enable conditional branches in functions over 1 MiB. */
970 const char *
971 aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
972 const char * branch_format)
974 rtx_code_label * tmp_label = gen_label_rtx ();
975 char label_buf[256];
976 char buffer[128];
977 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
978 CODE_LABEL_NUMBER (tmp_label));
979 const char *label_ptr = targetm.strip_name_encoding (label_buf);
980 rtx dest_label = operands[pos_label];
981 operands[pos_label] = tmp_label;
983 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
984 output_asm_insn (buffer, operands);
986 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
987 operands[pos_label] = dest_label;
988 output_asm_insn (buffer, operands);
989 return "";
992 void
993 aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
995 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
996 if (TARGET_GENERAL_REGS_ONLY)
997 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
998 else
999 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
1002 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
1003 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
1004 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
1005 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
1006 cost (in this case the best class is the lowest cost one). Using ALL_REGS
1007 irrespectively of its cost results in bad allocations with many redundant
1008 int<->FP moves which are expensive on various cores.
1009 To avoid this we don't allow ALL_REGS as the allocno class, but force a
1010 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
1011 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
1012 Otherwise set the allocno class depending on the mode.
1013 The result of this is that it is no longer inefficient to have a higher
1014 memory move cost than the register move cost.
1017 static reg_class_t
1018 aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1019 reg_class_t best_class)
1021 machine_mode mode;
1023 if (allocno_class != ALL_REGS)
1024 return allocno_class;
1026 if (best_class != ALL_REGS)
1027 return best_class;
1029 mode = PSEUDO_REGNO_MODE (regno);
1030 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1033 static unsigned int
1034 aarch64_min_divisions_for_recip_mul (machine_mode mode)
1036 if (GET_MODE_UNIT_SIZE (mode) == 4)
1037 return aarch64_tune_params.min_div_recip_mul_sf;
1038 return aarch64_tune_params.min_div_recip_mul_df;
1041 static int
1042 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
1043 machine_mode mode)
1045 if (VECTOR_MODE_P (mode))
1046 return aarch64_tune_params.vec_reassoc_width;
1047 if (INTEGRAL_MODE_P (mode))
1048 return aarch64_tune_params.int_reassoc_width;
1049 if (FLOAT_MODE_P (mode))
1050 return aarch64_tune_params.fp_reassoc_width;
1051 return 1;
1054 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
1055 unsigned
1056 aarch64_dbx_register_number (unsigned regno)
1058 if (GP_REGNUM_P (regno))
1059 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1060 else if (regno == SP_REGNUM)
1061 return AARCH64_DWARF_SP;
1062 else if (FP_REGNUM_P (regno))
1063 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
1065 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1066 equivalent DWARF register. */
1067 return DWARF_FRAME_REGISTERS;
1070 /* Return TRUE if MODE is any of the large INT modes. */
1071 static bool
1072 aarch64_vect_struct_mode_p (machine_mode mode)
1074 return mode == OImode || mode == CImode || mode == XImode;
1077 /* Return TRUE if MODE is any of the vector modes. */
1078 static bool
1079 aarch64_vector_mode_p (machine_mode mode)
1081 return aarch64_vector_mode_supported_p (mode)
1082 || aarch64_vect_struct_mode_p (mode);
1085 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1086 static bool
1087 aarch64_array_mode_supported_p (machine_mode mode,
1088 unsigned HOST_WIDE_INT nelems)
1090 if (TARGET_SIMD
1091 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1092 || AARCH64_VALID_SIMD_DREG_MODE (mode))
1093 && (nelems >= 2 && nelems <= 4))
1094 return true;
1096 return false;
1099 /* Implement TARGET_HARD_REGNO_NREGS. */
1101 static unsigned int
1102 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
1104 switch (aarch64_regno_regclass (regno))
1106 case FP_REGS:
1107 case FP_LO_REGS:
1108 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
1109 default:
1110 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1112 gcc_unreachable ();
1115 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1117 static bool
1118 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
1120 if (GET_MODE_CLASS (mode) == MODE_CC)
1121 return regno == CC_REGNUM;
1123 if (regno == SP_REGNUM)
1124 /* The purpose of comparing with ptr_mode is to support the
1125 global register variable associated with the stack pointer
1126 register via the syntax of asm ("wsp") in ILP32. */
1127 return mode == Pmode || mode == ptr_mode;
1129 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
1130 return mode == Pmode;
1132 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
1133 return true;
1135 if (FP_REGNUM_P (regno))
1137 if (aarch64_vect_struct_mode_p (mode))
1138 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
1139 else
1140 return true;
1143 return false;
1146 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1147 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1148 clobbers the top 64 bits when restoring the bottom 64 bits. */
1150 static bool
1151 aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1153 return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8;
1156 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
1157 machine_mode
1158 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
1159 machine_mode mode)
1161 /* Handle modes that fit within single registers. */
1162 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
1164 if (GET_MODE_SIZE (mode) >= 4)
1165 return mode;
1166 else
1167 return SImode;
1169 /* Fall back to generic for multi-reg and very large modes. */
1170 else
1171 return choose_hard_reg_mode (regno, nregs, false);
1174 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1175 that strcpy from constants will be faster. */
1177 static HOST_WIDE_INT
1178 aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1180 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1181 return MAX (align, BITS_PER_WORD);
1182 return align;
1185 /* Return true if calls to DECL should be treated as
1186 long-calls (ie called via a register). */
1187 static bool
1188 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1190 return false;
1193 /* Return true if calls to symbol-ref SYM should be treated as
1194 long-calls (ie called via a register). */
1195 bool
1196 aarch64_is_long_call_p (rtx sym)
1198 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1201 /* Return true if calls to symbol-ref SYM should not go through
1202 plt stubs. */
1204 bool
1205 aarch64_is_noplt_call_p (rtx sym)
1207 const_tree decl = SYMBOL_REF_DECL (sym);
1209 if (flag_pic
1210 && decl
1211 && (!flag_plt
1212 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1213 && !targetm.binds_local_p (decl))
1214 return true;
1216 return false;
1219 /* Return true if the offsets to a zero/sign-extract operation
1220 represent an expression that matches an extend operation. The
1221 operands represent the paramters from
1223 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
1224 bool
1225 aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
1226 rtx extract_imm)
1228 HOST_WIDE_INT mult_val, extract_val;
1230 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1231 return false;
1233 mult_val = INTVAL (mult_imm);
1234 extract_val = INTVAL (extract_imm);
1236 if (extract_val > 8
1237 && extract_val < GET_MODE_BITSIZE (mode)
1238 && exact_log2 (extract_val & ~7) > 0
1239 && (extract_val & 7) <= 4
1240 && mult_val == (1 << (extract_val & 7)))
1241 return true;
1243 return false;
1246 /* Emit an insn that's a simple single-set. Both the operands must be
1247 known to be valid. */
1248 inline static rtx_insn *
1249 emit_set_insn (rtx x, rtx y)
1251 return emit_insn (gen_rtx_SET (x, y));
1254 /* X and Y are two things to compare using CODE. Emit the compare insn and
1255 return the rtx for register 0 in the proper mode. */
1257 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1259 machine_mode mode = SELECT_CC_MODE (code, x, y);
1260 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1262 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1263 return cc_reg;
1266 /* Build the SYMBOL_REF for __tls_get_addr. */
1268 static GTY(()) rtx tls_get_addr_libfunc;
1271 aarch64_tls_get_addr (void)
1273 if (!tls_get_addr_libfunc)
1274 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1275 return tls_get_addr_libfunc;
1278 /* Return the TLS model to use for ADDR. */
1280 static enum tls_model
1281 tls_symbolic_operand_type (rtx addr)
1283 enum tls_model tls_kind = TLS_MODEL_NONE;
1284 rtx sym, addend;
1286 if (GET_CODE (addr) == CONST)
1288 split_const (addr, &sym, &addend);
1289 if (GET_CODE (sym) == SYMBOL_REF)
1290 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1292 else if (GET_CODE (addr) == SYMBOL_REF)
1293 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1295 return tls_kind;
1298 /* We'll allow lo_sum's in addresses in our legitimate addresses
1299 so that combine would take care of combining addresses where
1300 necessary, but for generation purposes, we'll generate the address
1301 as :
1302 RTL Absolute
1303 tmp = hi (symbol_ref); adrp x1, foo
1304 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1307 PIC TLS
1308 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1309 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1310 bl __tls_get_addr
1313 Load TLS symbol, depending on TLS mechanism and TLS access model.
1315 Global Dynamic - Traditional TLS:
1316 adrp tmp, :tlsgd:imm
1317 add dest, tmp, #:tlsgd_lo12:imm
1318 bl __tls_get_addr
1320 Global Dynamic - TLS Descriptors:
1321 adrp dest, :tlsdesc:imm
1322 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1323 add dest, dest, #:tlsdesc_lo12:imm
1324 blr tmp
1325 mrs tp, tpidr_el0
1326 add dest, dest, tp
1328 Initial Exec:
1329 mrs tp, tpidr_el0
1330 adrp tmp, :gottprel:imm
1331 ldr dest, [tmp, #:gottprel_lo12:imm]
1332 add dest, dest, tp
1334 Local Exec:
1335 mrs tp, tpidr_el0
1336 add t0, tp, #:tprel_hi12:imm, lsl #12
1337 add t0, t0, #:tprel_lo12_nc:imm
1340 static void
1341 aarch64_load_symref_appropriately (rtx dest, rtx imm,
1342 enum aarch64_symbol_type type)
1344 switch (type)
1346 case SYMBOL_SMALL_ABSOLUTE:
1348 /* In ILP32, the mode of dest can be either SImode or DImode. */
1349 rtx tmp_reg = dest;
1350 machine_mode mode = GET_MODE (dest);
1352 gcc_assert (mode == Pmode || mode == ptr_mode);
1354 if (can_create_pseudo_p ())
1355 tmp_reg = gen_reg_rtx (mode);
1357 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1358 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1359 return;
1362 case SYMBOL_TINY_ABSOLUTE:
1363 emit_insn (gen_rtx_SET (dest, imm));
1364 return;
1366 case SYMBOL_SMALL_GOT_28K:
1368 machine_mode mode = GET_MODE (dest);
1369 rtx gp_rtx = pic_offset_table_rtx;
1370 rtx insn;
1371 rtx mem;
1373 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1374 here before rtl expand. Tree IVOPT will generate rtl pattern to
1375 decide rtx costs, in which case pic_offset_table_rtx is not
1376 initialized. For that case no need to generate the first adrp
1377 instruction as the final cost for global variable access is
1378 one instruction. */
1379 if (gp_rtx != NULL)
1381 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1382 using the page base as GOT base, the first page may be wasted,
1383 in the worst scenario, there is only 28K space for GOT).
1385 The generate instruction sequence for accessing global variable
1388 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1390 Only one instruction needed. But we must initialize
1391 pic_offset_table_rtx properly. We generate initialize insn for
1392 every global access, and allow CSE to remove all redundant.
1394 The final instruction sequences will look like the following
1395 for multiply global variables access.
1397 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1399 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1400 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1401 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1402 ... */
1404 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1405 crtl->uses_pic_offset_table = 1;
1406 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1408 if (mode != GET_MODE (gp_rtx))
1409 gp_rtx = gen_lowpart (mode, gp_rtx);
1413 if (mode == ptr_mode)
1415 if (mode == DImode)
1416 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1417 else
1418 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1420 mem = XVECEXP (SET_SRC (insn), 0, 0);
1422 else
1424 gcc_assert (mode == Pmode);
1426 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1427 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1430 /* The operand is expected to be MEM. Whenever the related insn
1431 pattern changed, above code which calculate mem should be
1432 updated. */
1433 gcc_assert (GET_CODE (mem) == MEM);
1434 MEM_READONLY_P (mem) = 1;
1435 MEM_NOTRAP_P (mem) = 1;
1436 emit_insn (insn);
1437 return;
1440 case SYMBOL_SMALL_GOT_4G:
1442 /* In ILP32, the mode of dest can be either SImode or DImode,
1443 while the got entry is always of SImode size. The mode of
1444 dest depends on how dest is used: if dest is assigned to a
1445 pointer (e.g. in the memory), it has SImode; it may have
1446 DImode if dest is dereferenced to access the memeory.
1447 This is why we have to handle three different ldr_got_small
1448 patterns here (two patterns for ILP32). */
1450 rtx insn;
1451 rtx mem;
1452 rtx tmp_reg = dest;
1453 machine_mode mode = GET_MODE (dest);
1455 if (can_create_pseudo_p ())
1456 tmp_reg = gen_reg_rtx (mode);
1458 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1459 if (mode == ptr_mode)
1461 if (mode == DImode)
1462 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
1463 else
1464 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1466 mem = XVECEXP (SET_SRC (insn), 0, 0);
1468 else
1470 gcc_assert (mode == Pmode);
1472 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1473 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1476 gcc_assert (GET_CODE (mem) == MEM);
1477 MEM_READONLY_P (mem) = 1;
1478 MEM_NOTRAP_P (mem) = 1;
1479 emit_insn (insn);
1480 return;
1483 case SYMBOL_SMALL_TLSGD:
1485 rtx_insn *insns;
1486 machine_mode mode = GET_MODE (dest);
1487 rtx result = gen_rtx_REG (mode, R0_REGNUM);
1489 start_sequence ();
1490 if (TARGET_ILP32)
1491 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1492 else
1493 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
1494 insns = get_insns ();
1495 end_sequence ();
1497 RTL_CONST_CALL_P (insns) = 1;
1498 emit_libcall_block (insns, dest, result, imm);
1499 return;
1502 case SYMBOL_SMALL_TLSDESC:
1504 machine_mode mode = GET_MODE (dest);
1505 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
1506 rtx tp;
1508 gcc_assert (mode == Pmode || mode == ptr_mode);
1510 /* In ILP32, the got entry is always of SImode size. Unlike
1511 small GOT, the dest is fixed at reg 0. */
1512 if (TARGET_ILP32)
1513 emit_insn (gen_tlsdesc_small_si (imm));
1514 else
1515 emit_insn (gen_tlsdesc_small_di (imm));
1516 tp = aarch64_load_tp (NULL);
1518 if (mode != Pmode)
1519 tp = gen_lowpart (mode, tp);
1521 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
1522 if (REG_P (dest))
1523 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1524 return;
1527 case SYMBOL_SMALL_TLSIE:
1529 /* In ILP32, the mode of dest can be either SImode or DImode,
1530 while the got entry is always of SImode size. The mode of
1531 dest depends on how dest is used: if dest is assigned to a
1532 pointer (e.g. in the memory), it has SImode; it may have
1533 DImode if dest is dereferenced to access the memeory.
1534 This is why we have to handle three different tlsie_small
1535 patterns here (two patterns for ILP32). */
1536 machine_mode mode = GET_MODE (dest);
1537 rtx tmp_reg = gen_reg_rtx (mode);
1538 rtx tp = aarch64_load_tp (NULL);
1540 if (mode == ptr_mode)
1542 if (mode == DImode)
1543 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1544 else
1546 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1547 tp = gen_lowpart (mode, tp);
1550 else
1552 gcc_assert (mode == Pmode);
1553 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1556 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
1557 if (REG_P (dest))
1558 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1559 return;
1562 case SYMBOL_TLSLE12:
1563 case SYMBOL_TLSLE24:
1564 case SYMBOL_TLSLE32:
1565 case SYMBOL_TLSLE48:
1567 machine_mode mode = GET_MODE (dest);
1568 rtx tp = aarch64_load_tp (NULL);
1570 if (mode != Pmode)
1571 tp = gen_lowpart (mode, tp);
1573 switch (type)
1575 case SYMBOL_TLSLE12:
1576 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1577 (dest, tp, imm));
1578 break;
1579 case SYMBOL_TLSLE24:
1580 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1581 (dest, tp, imm));
1582 break;
1583 case SYMBOL_TLSLE32:
1584 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1585 (dest, imm));
1586 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1587 (dest, dest, tp));
1588 break;
1589 case SYMBOL_TLSLE48:
1590 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1591 (dest, imm));
1592 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1593 (dest, dest, tp));
1594 break;
1595 default:
1596 gcc_unreachable ();
1599 if (REG_P (dest))
1600 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1601 return;
1604 case SYMBOL_TINY_GOT:
1605 emit_insn (gen_ldr_got_tiny (dest, imm));
1606 return;
1608 case SYMBOL_TINY_TLSIE:
1610 machine_mode mode = GET_MODE (dest);
1611 rtx tp = aarch64_load_tp (NULL);
1613 if (mode == ptr_mode)
1615 if (mode == DImode)
1616 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1617 else
1619 tp = gen_lowpart (mode, tp);
1620 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1623 else
1625 gcc_assert (mode == Pmode);
1626 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1629 if (REG_P (dest))
1630 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
1631 return;
1634 default:
1635 gcc_unreachable ();
1639 /* Emit a move from SRC to DEST. Assume that the move expanders can
1640 handle all moves if !can_create_pseudo_p (). The distinction is
1641 important because, unlike emit_move_insn, the move expanders know
1642 how to force Pmode objects into the constant pool even when the
1643 constant pool address is not itself legitimate. */
1644 static rtx
1645 aarch64_emit_move (rtx dest, rtx src)
1647 return (can_create_pseudo_p ()
1648 ? emit_move_insn (dest, src)
1649 : emit_move_insn_1 (dest, src));
1652 /* Split a 128-bit move operation into two 64-bit move operations,
1653 taking care to handle partial overlap of register to register
1654 copies. Special cases are needed when moving between GP regs and
1655 FP regs. SRC can be a register, constant or memory; DST a register
1656 or memory. If either operand is memory it must not have any side
1657 effects. */
1658 void
1659 aarch64_split_128bit_move (rtx dst, rtx src)
1661 rtx dst_lo, dst_hi;
1662 rtx src_lo, src_hi;
1664 machine_mode mode = GET_MODE (dst);
1666 gcc_assert (mode == TImode || mode == TFmode);
1667 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1668 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
1670 if (REG_P (dst) && REG_P (src))
1672 int src_regno = REGNO (src);
1673 int dst_regno = REGNO (dst);
1675 /* Handle FP <-> GP regs. */
1676 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1678 src_lo = gen_lowpart (word_mode, src);
1679 src_hi = gen_highpart (word_mode, src);
1681 if (mode == TImode)
1683 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1684 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1686 else
1688 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1689 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1691 return;
1693 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1695 dst_lo = gen_lowpart (word_mode, dst);
1696 dst_hi = gen_highpart (word_mode, dst);
1698 if (mode == TImode)
1700 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1701 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1703 else
1705 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1706 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1708 return;
1712 dst_lo = gen_lowpart (word_mode, dst);
1713 dst_hi = gen_highpart (word_mode, dst);
1714 src_lo = gen_lowpart (word_mode, src);
1715 src_hi = gen_highpart_mode (word_mode, mode, src);
1717 /* At most one pairing may overlap. */
1718 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1720 aarch64_emit_move (dst_hi, src_hi);
1721 aarch64_emit_move (dst_lo, src_lo);
1723 else
1725 aarch64_emit_move (dst_lo, src_lo);
1726 aarch64_emit_move (dst_hi, src_hi);
1730 bool
1731 aarch64_split_128bit_move_p (rtx dst, rtx src)
1733 return (! REG_P (src)
1734 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1737 /* Split a complex SIMD combine. */
1739 void
1740 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1742 machine_mode src_mode = GET_MODE (src1);
1743 machine_mode dst_mode = GET_MODE (dst);
1745 gcc_assert (VECTOR_MODE_P (dst_mode));
1746 gcc_assert (register_operand (dst, dst_mode)
1747 && register_operand (src1, src_mode)
1748 && register_operand (src2, src_mode));
1750 rtx (*gen) (rtx, rtx, rtx);
1752 switch (src_mode)
1754 case E_V8QImode:
1755 gen = gen_aarch64_simd_combinev8qi;
1756 break;
1757 case E_V4HImode:
1758 gen = gen_aarch64_simd_combinev4hi;
1759 break;
1760 case E_V2SImode:
1761 gen = gen_aarch64_simd_combinev2si;
1762 break;
1763 case E_V4HFmode:
1764 gen = gen_aarch64_simd_combinev4hf;
1765 break;
1766 case E_V2SFmode:
1767 gen = gen_aarch64_simd_combinev2sf;
1768 break;
1769 case E_DImode:
1770 gen = gen_aarch64_simd_combinedi;
1771 break;
1772 case E_DFmode:
1773 gen = gen_aarch64_simd_combinedf;
1774 break;
1775 default:
1776 gcc_unreachable ();
1779 emit_insn (gen (dst, src1, src2));
1780 return;
1783 /* Split a complex SIMD move. */
1785 void
1786 aarch64_split_simd_move (rtx dst, rtx src)
1788 machine_mode src_mode = GET_MODE (src);
1789 machine_mode dst_mode = GET_MODE (dst);
1791 gcc_assert (VECTOR_MODE_P (dst_mode));
1793 if (REG_P (dst) && REG_P (src))
1795 rtx (*gen) (rtx, rtx);
1797 gcc_assert (VECTOR_MODE_P (src_mode));
1799 switch (src_mode)
1801 case E_V16QImode:
1802 gen = gen_aarch64_split_simd_movv16qi;
1803 break;
1804 case E_V8HImode:
1805 gen = gen_aarch64_split_simd_movv8hi;
1806 break;
1807 case E_V4SImode:
1808 gen = gen_aarch64_split_simd_movv4si;
1809 break;
1810 case E_V2DImode:
1811 gen = gen_aarch64_split_simd_movv2di;
1812 break;
1813 case E_V8HFmode:
1814 gen = gen_aarch64_split_simd_movv8hf;
1815 break;
1816 case E_V4SFmode:
1817 gen = gen_aarch64_split_simd_movv4sf;
1818 break;
1819 case E_V2DFmode:
1820 gen = gen_aarch64_split_simd_movv2df;
1821 break;
1822 default:
1823 gcc_unreachable ();
1826 emit_insn (gen (dst, src));
1827 return;
1831 bool
1832 aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
1833 machine_mode ymode, rtx y)
1835 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
1836 gcc_assert (r != NULL);
1837 return rtx_equal_p (x, r);
1841 static rtx
1842 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1844 if (can_create_pseudo_p ())
1845 return force_reg (mode, value);
1846 else
1848 x = aarch64_emit_move (x, value);
1849 return x;
1854 static rtx
1855 aarch64_add_offset (scalar_int_mode mode, rtx temp, rtx reg,
1856 HOST_WIDE_INT offset)
1858 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1860 rtx high;
1861 /* Load the full offset into a register. This
1862 might be improvable in the future. */
1863 high = GEN_INT (offset);
1864 offset = 0;
1865 high = aarch64_force_temporary (mode, temp, high);
1866 reg = aarch64_force_temporary (mode, temp,
1867 gen_rtx_PLUS (mode, high, reg));
1869 return plus_constant (mode, reg, offset);
1872 static int
1873 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1874 scalar_int_mode mode)
1876 int i;
1877 unsigned HOST_WIDE_INT val, val2, mask;
1878 int one_match, zero_match;
1879 int num_insns;
1881 val = INTVAL (imm);
1883 if (aarch64_move_imm (val, mode))
1885 if (generate)
1886 emit_insn (gen_rtx_SET (dest, imm));
1887 return 1;
1890 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
1891 (with XXXX non-zero). In that case check to see if the move can be done in
1892 a smaller mode. */
1893 val2 = val & 0xffffffff;
1894 if (mode == DImode
1895 && aarch64_move_imm (val2, SImode)
1896 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
1898 if (generate)
1899 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1901 /* Check if we have to emit a second instruction by checking to see
1902 if any of the upper 32 bits of the original DI mode value is set. */
1903 if (val == val2)
1904 return 1;
1906 i = (val >> 48) ? 48 : 32;
1908 if (generate)
1909 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1910 GEN_INT ((val >> i) & 0xffff)));
1912 return 2;
1915 if ((val >> 32) == 0 || mode == SImode)
1917 if (generate)
1919 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
1920 if (mode == SImode)
1921 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1922 GEN_INT ((val >> 16) & 0xffff)));
1923 else
1924 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
1925 GEN_INT ((val >> 16) & 0xffff)));
1927 return 2;
1930 /* Remaining cases are all for DImode. */
1932 mask = 0xffff;
1933 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
1934 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
1935 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
1936 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
1938 if (zero_match != 2 && one_match != 2)
1940 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1941 For a 64-bit bitmask try whether changing 16 bits to all ones or
1942 zeroes creates a valid bitmask. To check any repeated bitmask,
1943 try using 16 bits from the other 32-bit half of val. */
1945 for (i = 0; i < 64; i += 16, mask <<= 16)
1947 val2 = val & ~mask;
1948 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1949 break;
1950 val2 = val | mask;
1951 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1952 break;
1953 val2 = val2 & ~mask;
1954 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
1955 if (val2 != val && aarch64_bitmask_imm (val2, mode))
1956 break;
1958 if (i != 64)
1960 if (generate)
1962 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
1963 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1964 GEN_INT ((val >> i) & 0xffff)));
1966 return 2;
1970 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1971 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1972 otherwise skip zero bits. */
1974 num_insns = 1;
1975 mask = 0xffff;
1976 val2 = one_match > zero_match ? ~val : val;
1977 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
1979 if (generate)
1980 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
1981 ? (val | ~(mask << i))
1982 : (val & (mask << i)))));
1983 for (i += 16; i < 64; i += 16)
1985 if ((val2 & (mask << i)) == 0)
1986 continue;
1987 if (generate)
1988 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1989 GEN_INT ((val >> i) & 0xffff)));
1990 num_insns ++;
1993 return num_insns;
1996 /* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1997 temporary value if necessary. FRAME_RELATED_P should be true if
1998 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1999 to the generated instructions. If SCRATCHREG is known to hold
2000 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2001 immediate again.
2003 Since this function may be used to adjust the stack pointer, we must
2004 ensure that it cannot cause transient stack deallocation (for example
2005 by first incrementing SP and then decrementing when adjusting by a
2006 large immediate). */
2008 static void
2009 aarch64_add_constant_internal (scalar_int_mode mode, int regnum,
2010 int scratchreg, HOST_WIDE_INT delta,
2011 bool frame_related_p, bool emit_move_imm)
2013 HOST_WIDE_INT mdelta = abs_hwi (delta);
2014 rtx this_rtx = gen_rtx_REG (mode, regnum);
2015 rtx_insn *insn;
2017 if (!mdelta)
2018 return;
2020 /* Single instruction adjustment. */
2021 if (aarch64_uimm12_shift (mdelta))
2023 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
2024 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2025 return;
2028 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2029 Only do this if mdelta is not a 16-bit move as adjusting using a move
2030 is better. */
2031 if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
2033 HOST_WIDE_INT low_off = mdelta & 0xfff;
2035 low_off = delta < 0 ? -low_off : low_off;
2036 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
2037 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2038 insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
2039 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2040 return;
2043 /* Emit a move immediate if required and an addition/subtraction. */
2044 rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
2045 if (emit_move_imm)
2046 aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
2047 insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
2048 : gen_add2_insn (this_rtx, scratch_rtx));
2049 if (frame_related_p)
2051 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2052 rtx adj = plus_constant (mode, this_rtx, delta);
2053 add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj));
2057 static inline void
2058 aarch64_add_constant (scalar_int_mode mode, int regnum, int scratchreg,
2059 HOST_WIDE_INT delta)
2061 aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
2064 static inline void
2065 aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
2067 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
2068 true, emit_move_imm);
2071 static inline void
2072 aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
2074 aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
2075 frame_related_p, true);
2078 void
2079 aarch64_expand_mov_immediate (rtx dest, rtx imm)
2081 machine_mode mode = GET_MODE (dest);
2083 gcc_assert (mode == SImode || mode == DImode);
2085 /* Check on what type of symbol it is. */
2086 scalar_int_mode int_mode;
2087 if ((GET_CODE (imm) == SYMBOL_REF
2088 || GET_CODE (imm) == LABEL_REF
2089 || GET_CODE (imm) == CONST)
2090 && is_a <scalar_int_mode> (mode, &int_mode))
2092 rtx mem, base, offset;
2093 enum aarch64_symbol_type sty;
2095 /* If we have (const (plus symbol offset)), separate out the offset
2096 before we start classifying the symbol. */
2097 split_const (imm, &base, &offset);
2099 sty = aarch64_classify_symbol (base, offset);
2100 switch (sty)
2102 case SYMBOL_FORCE_TO_MEM:
2103 if (offset != const0_rtx
2104 && targetm.cannot_force_const_mem (int_mode, imm))
2106 gcc_assert (can_create_pseudo_p ());
2107 base = aarch64_force_temporary (int_mode, dest, base);
2108 base = aarch64_add_offset (int_mode, NULL, base,
2109 INTVAL (offset));
2110 aarch64_emit_move (dest, base);
2111 return;
2114 mem = force_const_mem (ptr_mode, imm);
2115 gcc_assert (mem);
2117 /* If we aren't generating PC relative literals, then
2118 we need to expand the literal pool access carefully.
2119 This is something that needs to be done in a number
2120 of places, so could well live as a separate function. */
2121 if (!aarch64_pcrelative_literal_loads)
2123 gcc_assert (can_create_pseudo_p ());
2124 base = gen_reg_rtx (ptr_mode);
2125 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
2126 if (ptr_mode != Pmode)
2127 base = convert_memory_address (Pmode, base);
2128 mem = gen_rtx_MEM (ptr_mode, base);
2131 if (int_mode != ptr_mode)
2132 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
2134 emit_insn (gen_rtx_SET (dest, mem));
2136 return;
2138 case SYMBOL_SMALL_TLSGD:
2139 case SYMBOL_SMALL_TLSDESC:
2140 case SYMBOL_SMALL_TLSIE:
2141 case SYMBOL_SMALL_GOT_28K:
2142 case SYMBOL_SMALL_GOT_4G:
2143 case SYMBOL_TINY_GOT:
2144 case SYMBOL_TINY_TLSIE:
2145 if (offset != const0_rtx)
2147 gcc_assert(can_create_pseudo_p ());
2148 base = aarch64_force_temporary (int_mode, dest, base);
2149 base = aarch64_add_offset (int_mode, NULL, base,
2150 INTVAL (offset));
2151 aarch64_emit_move (dest, base);
2152 return;
2154 /* FALLTHRU */
2156 case SYMBOL_SMALL_ABSOLUTE:
2157 case SYMBOL_TINY_ABSOLUTE:
2158 case SYMBOL_TLSLE12:
2159 case SYMBOL_TLSLE24:
2160 case SYMBOL_TLSLE32:
2161 case SYMBOL_TLSLE48:
2162 aarch64_load_symref_appropriately (dest, imm, sty);
2163 return;
2165 default:
2166 gcc_unreachable ();
2170 if (!CONST_INT_P (imm))
2172 if (GET_CODE (imm) == HIGH)
2173 emit_insn (gen_rtx_SET (dest, imm));
2174 else
2176 rtx mem = force_const_mem (mode, imm);
2177 gcc_assert (mem);
2178 emit_insn (gen_rtx_SET (dest, mem));
2181 return;
2184 aarch64_internal_mov_immediate (dest, imm, true,
2185 as_a <scalar_int_mode> (mode));
2188 static bool
2189 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2190 tree exp ATTRIBUTE_UNUSED)
2192 /* Currently, always true. */
2193 return true;
2196 /* Implement TARGET_PASS_BY_REFERENCE. */
2198 static bool
2199 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
2200 machine_mode mode,
2201 const_tree type,
2202 bool named ATTRIBUTE_UNUSED)
2204 HOST_WIDE_INT size;
2205 machine_mode dummymode;
2206 int nregs;
2208 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2209 size = (mode == BLKmode && type)
2210 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2212 /* Aggregates are passed by reference based on their size. */
2213 if (type && AGGREGATE_TYPE_P (type))
2215 size = int_size_in_bytes (type);
2218 /* Variable sized arguments are always returned by reference. */
2219 if (size < 0)
2220 return true;
2222 /* Can this be a candidate to be passed in fp/simd register(s)? */
2223 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2224 &dummymode, &nregs,
2225 NULL))
2226 return false;
2228 /* Arguments which are variable sized or larger than 2 registers are
2229 passed by reference unless they are a homogenous floating point
2230 aggregate. */
2231 return size > 2 * UNITS_PER_WORD;
2234 /* Return TRUE if VALTYPE is padded to its least significant bits. */
2235 static bool
2236 aarch64_return_in_msb (const_tree valtype)
2238 machine_mode dummy_mode;
2239 int dummy_int;
2241 /* Never happens in little-endian mode. */
2242 if (!BYTES_BIG_ENDIAN)
2243 return false;
2245 /* Only composite types smaller than or equal to 16 bytes can
2246 be potentially returned in registers. */
2247 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
2248 || int_size_in_bytes (valtype) <= 0
2249 || int_size_in_bytes (valtype) > 16)
2250 return false;
2252 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2253 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2254 is always passed/returned in the least significant bits of fp/simd
2255 register(s). */
2256 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
2257 &dummy_mode, &dummy_int, NULL))
2258 return false;
2260 return true;
2263 /* Implement TARGET_FUNCTION_VALUE.
2264 Define how to find the value returned by a function. */
2266 static rtx
2267 aarch64_function_value (const_tree type, const_tree func,
2268 bool outgoing ATTRIBUTE_UNUSED)
2270 machine_mode mode;
2271 int unsignedp;
2272 int count;
2273 machine_mode ag_mode;
2275 mode = TYPE_MODE (type);
2276 if (INTEGRAL_TYPE_P (type))
2277 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
2279 if (aarch64_return_in_msb (type))
2281 HOST_WIDE_INT size = int_size_in_bytes (type);
2283 if (size % UNITS_PER_WORD != 0)
2285 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2286 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
2290 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
2291 &ag_mode, &count, NULL))
2293 if (!aarch64_composite_type_p (type, mode))
2295 gcc_assert (count == 1 && mode == ag_mode);
2296 return gen_rtx_REG (mode, V0_REGNUM);
2298 else
2300 int i;
2301 rtx par;
2303 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
2304 for (i = 0; i < count; i++)
2306 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
2307 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2308 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
2309 XVECEXP (par, 0, i) = tmp;
2311 return par;
2314 else
2315 return gen_rtx_REG (mode, R0_REGNUM);
2318 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2319 Return true if REGNO is the number of a hard register in which the values
2320 of called function may come back. */
2322 static bool
2323 aarch64_function_value_regno_p (const unsigned int regno)
2325 /* Maximum of 16 bytes can be returned in the general registers. Examples
2326 of 16-byte return values are: 128-bit integers and 16-byte small
2327 structures (excluding homogeneous floating-point aggregates). */
2328 if (regno == R0_REGNUM || regno == R1_REGNUM)
2329 return true;
2331 /* Up to four fp/simd registers can return a function value, e.g. a
2332 homogeneous floating-point aggregate having four members. */
2333 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
2334 return TARGET_FLOAT;
2336 return false;
2339 /* Implement TARGET_RETURN_IN_MEMORY.
2341 If the type T of the result of a function is such that
2342 void func (T arg)
2343 would require that arg be passed as a value in a register (or set of
2344 registers) according to the parameter passing rules, then the result
2345 is returned in the same registers as would be used for such an
2346 argument. */
2348 static bool
2349 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2351 HOST_WIDE_INT size;
2352 machine_mode ag_mode;
2353 int count;
2355 if (!AGGREGATE_TYPE_P (type)
2356 && TREE_CODE (type) != COMPLEX_TYPE
2357 && TREE_CODE (type) != VECTOR_TYPE)
2358 /* Simple scalar types always returned in registers. */
2359 return false;
2361 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
2362 type,
2363 &ag_mode,
2364 &count,
2365 NULL))
2366 return false;
2368 /* Types larger than 2 registers returned in memory. */
2369 size = int_size_in_bytes (type);
2370 return (size < 0 || size > 2 * UNITS_PER_WORD);
2373 static bool
2374 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
2375 const_tree type, int *nregs)
2377 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2378 return aarch64_vfp_is_call_or_return_candidate (mode,
2379 type,
2380 &pcum->aapcs_vfp_rmode,
2381 nregs,
2382 NULL);
2385 /* Given MODE and TYPE of a function argument, return the alignment in
2386 bits. The idea is to suppress any stronger alignment requested by
2387 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2388 This is a helper function for local use only. */
2390 static unsigned int
2391 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
2393 if (!type)
2394 return GET_MODE_ALIGNMENT (mode);
2396 if (integer_zerop (TYPE_SIZE (type)))
2397 return 0;
2399 gcc_assert (TYPE_MODE (type) == mode);
2401 if (!AGGREGATE_TYPE_P (type))
2402 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
2404 if (TREE_CODE (type) == ARRAY_TYPE)
2405 return TYPE_ALIGN (TREE_TYPE (type));
2407 unsigned int alignment = 0;
2408 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2409 if (TREE_CODE (field) == FIELD_DECL)
2410 alignment = std::max (alignment, DECL_ALIGN (field));
2412 return alignment;
2415 /* Layout a function argument according to the AAPCS64 rules. The rule
2416 numbers refer to the rule numbers in the AAPCS64. */
2418 static void
2419 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
2420 const_tree type,
2421 bool named ATTRIBUTE_UNUSED)
2423 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2424 int ncrn, nvrn, nregs;
2425 bool allocate_ncrn, allocate_nvrn;
2426 HOST_WIDE_INT size;
2428 /* We need to do this once per argument. */
2429 if (pcum->aapcs_arg_processed)
2430 return;
2432 pcum->aapcs_arg_processed = true;
2434 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2435 size
2436 = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
2437 UNITS_PER_WORD);
2439 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
2440 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
2441 mode,
2442 type,
2443 &nregs);
2445 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2446 The following code thus handles passing by SIMD/FP registers first. */
2448 nvrn = pcum->aapcs_nvrn;
2450 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2451 and homogenous short-vector aggregates (HVA). */
2452 if (allocate_nvrn)
2454 if (!TARGET_FLOAT)
2455 aarch64_err_no_fpadvsimd (mode, "argument");
2457 if (nvrn + nregs <= NUM_FP_ARG_REGS)
2459 pcum->aapcs_nextnvrn = nvrn + nregs;
2460 if (!aarch64_composite_type_p (type, mode))
2462 gcc_assert (nregs == 1);
2463 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
2465 else
2467 rtx par;
2468 int i;
2469 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2470 for (i = 0; i < nregs; i++)
2472 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
2473 V0_REGNUM + nvrn + i);
2474 tmp = gen_rtx_EXPR_LIST
2475 (VOIDmode, tmp,
2476 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
2477 XVECEXP (par, 0, i) = tmp;
2479 pcum->aapcs_reg = par;
2481 return;
2483 else
2485 /* C.3 NSRN is set to 8. */
2486 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
2487 goto on_stack;
2491 ncrn = pcum->aapcs_ncrn;
2492 nregs = size / UNITS_PER_WORD;
2494 /* C6 - C9. though the sign and zero extension semantics are
2495 handled elsewhere. This is the case where the argument fits
2496 entirely general registers. */
2497 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
2500 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
2502 /* C.8 if the argument has an alignment of 16 then the NGRN is
2503 rounded up to the next even number. */
2504 if (nregs == 2
2505 && ncrn % 2
2506 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
2507 comparison is there because for > 16 * BITS_PER_UNIT
2508 alignment nregs should be > 2 and therefore it should be
2509 passed by reference rather than value. */
2510 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2512 ++ncrn;
2513 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
2516 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2517 A reg is still generated for it, but the caller should be smart
2518 enough not to use it. */
2519 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2520 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
2521 else
2523 rtx par;
2524 int i;
2526 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
2527 for (i = 0; i < nregs; i++)
2529 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
2530 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
2531 GEN_INT (i * UNITS_PER_WORD));
2532 XVECEXP (par, 0, i) = tmp;
2534 pcum->aapcs_reg = par;
2537 pcum->aapcs_nextncrn = ncrn + nregs;
2538 return;
2541 /* C.11 */
2542 pcum->aapcs_nextncrn = NUM_ARG_REGS;
2544 /* The argument is passed on stack; record the needed number of words for
2545 this argument and align the total size if necessary. */
2546 on_stack:
2547 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2549 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
2550 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
2551 16 / UNITS_PER_WORD);
2552 return;
2555 /* Implement TARGET_FUNCTION_ARG. */
2557 static rtx
2558 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
2559 const_tree type, bool named)
2561 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2562 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
2564 if (mode == VOIDmode)
2565 return NULL_RTX;
2567 aarch64_layout_arg (pcum_v, mode, type, named);
2568 return pcum->aapcs_reg;
2571 void
2572 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
2573 const_tree fntype ATTRIBUTE_UNUSED,
2574 rtx libname ATTRIBUTE_UNUSED,
2575 const_tree fndecl ATTRIBUTE_UNUSED,
2576 unsigned n_named ATTRIBUTE_UNUSED)
2578 pcum->aapcs_ncrn = 0;
2579 pcum->aapcs_nvrn = 0;
2580 pcum->aapcs_nextncrn = 0;
2581 pcum->aapcs_nextnvrn = 0;
2582 pcum->pcs_variant = ARM_PCS_AAPCS64;
2583 pcum->aapcs_reg = NULL_RTX;
2584 pcum->aapcs_arg_processed = false;
2585 pcum->aapcs_stack_words = 0;
2586 pcum->aapcs_stack_size = 0;
2588 if (!TARGET_FLOAT
2589 && fndecl && TREE_PUBLIC (fndecl)
2590 && fntype && fntype != error_mark_node)
2592 const_tree type = TREE_TYPE (fntype);
2593 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
2594 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
2595 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
2596 &mode, &nregs, NULL))
2597 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
2599 return;
2602 static void
2603 aarch64_function_arg_advance (cumulative_args_t pcum_v,
2604 machine_mode mode,
2605 const_tree type,
2606 bool named)
2608 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
2609 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
2611 aarch64_layout_arg (pcum_v, mode, type, named);
2612 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
2613 != (pcum->aapcs_stack_words != 0));
2614 pcum->aapcs_arg_processed = false;
2615 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
2616 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
2617 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
2618 pcum->aapcs_stack_words = 0;
2619 pcum->aapcs_reg = NULL_RTX;
2623 bool
2624 aarch64_function_arg_regno_p (unsigned regno)
2626 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
2627 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
2630 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2631 PARM_BOUNDARY bits of alignment, but will be given anything up
2632 to STACK_BOUNDARY bits if the type requires it. This makes sure
2633 that both before and after the layout of each argument, the Next
2634 Stacked Argument Address (NSAA) will have a minimum alignment of
2635 8 bytes. */
2637 static unsigned int
2638 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
2640 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
2641 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
2644 /* Implement TARGET_FUNCTION_ARG_PADDING.
2646 Small aggregate types are placed in the lowest memory address.
2648 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2650 static pad_direction
2651 aarch64_function_arg_padding (machine_mode mode, const_tree type)
2653 /* On little-endian targets, the least significant byte of every stack
2654 argument is passed at the lowest byte address of the stack slot. */
2655 if (!BYTES_BIG_ENDIAN)
2656 return PAD_UPWARD;
2658 /* Otherwise, integral, floating-point and pointer types are padded downward:
2659 the least significant byte of a stack argument is passed at the highest
2660 byte address of the stack slot. */
2661 if (type
2662 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
2663 || POINTER_TYPE_P (type))
2664 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
2665 return PAD_DOWNWARD;
2667 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2668 return PAD_UPWARD;
2671 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2673 It specifies padding for the last (may also be the only)
2674 element of a block move between registers and memory. If
2675 assuming the block is in the memory, padding upward means that
2676 the last element is padded after its highest significant byte,
2677 while in downward padding, the last element is padded at the
2678 its least significant byte side.
2680 Small aggregates and small complex types are always padded
2681 upwards.
2683 We don't need to worry about homogeneous floating-point or
2684 short-vector aggregates; their move is not affected by the
2685 padding direction determined here. Regardless of endianness,
2686 each element of such an aggregate is put in the least
2687 significant bits of a fp/simd register.
2689 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2690 register has useful data, and return the opposite if the most
2691 significant byte does. */
2693 bool
2694 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
2695 bool first ATTRIBUTE_UNUSED)
2698 /* Small composite types are always padded upward. */
2699 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
2701 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
2702 : GET_MODE_SIZE (mode));
2703 if (size < 2 * UNITS_PER_WORD)
2704 return true;
2707 /* Otherwise, use the default padding. */
2708 return !BYTES_BIG_ENDIAN;
2711 static scalar_int_mode
2712 aarch64_libgcc_cmp_return_mode (void)
2714 return SImode;
2717 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2719 /* We use the 12-bit shifted immediate arithmetic instructions so values
2720 must be multiple of (1 << 12), i.e. 4096. */
2721 #define ARITH_FACTOR 4096
2723 #if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2724 #error Cannot use simple address calculation for stack probing
2725 #endif
2727 /* The pair of scratch registers used for stack probing. */
2728 #define PROBE_STACK_FIRST_REG 9
2729 #define PROBE_STACK_SECOND_REG 10
2731 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2732 inclusive. These are offsets from the current stack pointer. */
2734 static void
2735 aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
2737 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
2739 /* See the same assertion on PROBE_INTERVAL above. */
2740 gcc_assert ((first % ARITH_FACTOR) == 0);
2742 /* See if we have a constant small number of probes to generate. If so,
2743 that's the easy case. */
2744 if (size <= PROBE_INTERVAL)
2746 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
2748 emit_set_insn (reg1,
2749 plus_constant (Pmode,
2750 stack_pointer_rtx, -(first + base)));
2751 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
2754 /* The run-time loop is made up of 8 insns in the generic case while the
2755 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2756 else if (size <= 4 * PROBE_INTERVAL)
2758 HOST_WIDE_INT i, rem;
2760 emit_set_insn (reg1,
2761 plus_constant (Pmode,
2762 stack_pointer_rtx,
2763 -(first + PROBE_INTERVAL)));
2764 emit_stack_probe (reg1);
2766 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2767 it exceeds SIZE. If only two probes are needed, this will not
2768 generate any code. Then probe at FIRST + SIZE. */
2769 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
2771 emit_set_insn (reg1,
2772 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
2773 emit_stack_probe (reg1);
2776 rem = size - (i - PROBE_INTERVAL);
2777 if (rem > 256)
2779 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2781 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
2782 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
2784 else
2785 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
2788 /* Otherwise, do the same as above, but in a loop. Note that we must be
2789 extra careful with variables wrapping around because we might be at
2790 the very top (or the very bottom) of the address space and we have
2791 to be able to handle this case properly; in particular, we use an
2792 equality test for the loop condition. */
2793 else
2795 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
2797 /* Step 1: round SIZE to the previous multiple of the interval. */
2799 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
2802 /* Step 2: compute initial and final value of the loop counter. */
2804 /* TEST_ADDR = SP + FIRST. */
2805 emit_set_insn (reg1,
2806 plus_constant (Pmode, stack_pointer_rtx, -first));
2808 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2809 HOST_WIDE_INT adjustment = - (first + rounded_size);
2810 if (! aarch64_uimm12_shift (adjustment))
2812 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
2813 true, Pmode);
2814 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
2816 else
2818 emit_set_insn (reg2,
2819 plus_constant (Pmode, stack_pointer_rtx, adjustment));
2822 /* Step 3: the loop
2826 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2827 probe at TEST_ADDR
2829 while (TEST_ADDR != LAST_ADDR)
2831 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2832 until it is equal to ROUNDED_SIZE. */
2834 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
2837 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2838 that SIZE is equal to ROUNDED_SIZE. */
2840 if (size != rounded_size)
2842 HOST_WIDE_INT rem = size - rounded_size;
2844 if (rem > 256)
2846 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
2848 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
2849 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
2851 else
2852 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
2856 /* Make sure nothing is scheduled before we are done. */
2857 emit_insn (gen_blockage ());
2860 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2861 absolute addresses. */
2863 const char *
2864 aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
2866 static int labelno = 0;
2867 char loop_lab[32];
2868 rtx xops[2];
2870 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
2872 /* Loop. */
2873 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
2875 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2876 xops[0] = reg1;
2877 xops[1] = GEN_INT (PROBE_INTERVAL);
2878 output_asm_insn ("sub\t%0, %0, %1", xops);
2880 /* Probe at TEST_ADDR. */
2881 output_asm_insn ("str\txzr, [%0]", xops);
2883 /* Test if TEST_ADDR == LAST_ADDR. */
2884 xops[1] = reg2;
2885 output_asm_insn ("cmp\t%0, %1", xops);
2887 /* Branch. */
2888 fputs ("\tb.ne\t", asm_out_file);
2889 assemble_name_raw (asm_out_file, loop_lab);
2890 fputc ('\n', asm_out_file);
2892 return "";
2895 /* Mark the registers that need to be saved by the callee and calculate
2896 the size of the callee-saved registers area and frame record (both FP
2897 and LR may be omitted). */
2898 static void
2899 aarch64_layout_frame (void)
2901 HOST_WIDE_INT offset = 0;
2902 int regno, last_fp_reg = INVALID_REGNUM;
2904 if (reload_completed && cfun->machine->frame.laid_out)
2905 return;
2907 /* Force a frame chain for EH returns so the return address is at FP+8. */
2908 cfun->machine->frame.emit_frame_chain
2909 = frame_pointer_needed || crtl->calls_eh_return;
2911 /* Emit a frame chain if the frame pointer is enabled.
2912 If -momit-leaf-frame-pointer is used, do not use a frame chain
2913 in leaf functions which do not use LR. */
2914 if (flag_omit_frame_pointer == 2
2915 && !(flag_omit_leaf_frame_pointer && crtl->is_leaf
2916 && !df_regs_ever_live_p (LR_REGNUM)))
2917 cfun->machine->frame.emit_frame_chain = true;
2919 #define SLOT_NOT_REQUIRED (-2)
2920 #define SLOT_REQUIRED (-1)
2922 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
2923 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
2925 /* First mark all the registers that really need to be saved... */
2926 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2927 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2929 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2930 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2932 /* ... that includes the eh data registers (if needed)... */
2933 if (crtl->calls_eh_return)
2934 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2935 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2936 = SLOT_REQUIRED;
2938 /* ... and any callee saved register that dataflow says is live. */
2939 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2940 if (df_regs_ever_live_p (regno)
2941 && (regno == R30_REGNUM
2942 || !call_used_regs[regno]))
2943 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2945 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2946 if (df_regs_ever_live_p (regno)
2947 && !call_used_regs[regno])
2949 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2950 last_fp_reg = regno;
2953 if (cfun->machine->frame.emit_frame_chain)
2955 /* FP and LR are placed in the linkage record. */
2956 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2957 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2958 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2959 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2960 offset = 2 * UNITS_PER_WORD;
2963 /* Now assign stack slots for them. */
2964 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2965 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2967 cfun->machine->frame.reg_offset[regno] = offset;
2968 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
2969 cfun->machine->frame.wb_candidate1 = regno;
2970 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
2971 cfun->machine->frame.wb_candidate2 = regno;
2972 offset += UNITS_PER_WORD;
2975 HOST_WIDE_INT max_int_offset = offset;
2976 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2977 bool has_align_gap = offset != max_int_offset;
2979 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2980 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2982 /* If there is an alignment gap between integer and fp callee-saves,
2983 allocate the last fp register to it if possible. */
2984 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
2986 cfun->machine->frame.reg_offset[regno] = max_int_offset;
2987 break;
2990 cfun->machine->frame.reg_offset[regno] = offset;
2991 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
2992 cfun->machine->frame.wb_candidate1 = regno;
2993 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
2994 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2995 cfun->machine->frame.wb_candidate2 = regno;
2996 offset += UNITS_PER_WORD;
2999 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
3001 cfun->machine->frame.saved_regs_size = offset;
3003 HOST_WIDE_INT varargs_and_saved_regs_size
3004 = offset + cfun->machine->frame.saved_varargs_size;
3006 cfun->machine->frame.hard_fp_offset
3007 = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (),
3008 STACK_BOUNDARY / BITS_PER_UNIT);
3010 cfun->machine->frame.frame_size
3011 = ROUND_UP (cfun->machine->frame.hard_fp_offset
3012 + crtl->outgoing_args_size,
3013 STACK_BOUNDARY / BITS_PER_UNIT);
3015 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
3017 cfun->machine->frame.initial_adjust = 0;
3018 cfun->machine->frame.final_adjust = 0;
3019 cfun->machine->frame.callee_adjust = 0;
3020 cfun->machine->frame.callee_offset = 0;
3022 HOST_WIDE_INT max_push_offset = 0;
3023 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
3024 max_push_offset = 512;
3025 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
3026 max_push_offset = 256;
3028 if (cfun->machine->frame.frame_size < max_push_offset
3029 && crtl->outgoing_args_size == 0)
3031 /* Simple, small frame with no outgoing arguments:
3032 stp reg1, reg2, [sp, -frame_size]!
3033 stp reg3, reg4, [sp, 16] */
3034 cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size;
3036 else if ((crtl->outgoing_args_size
3037 + cfun->machine->frame.saved_regs_size < 512)
3038 && !(cfun->calls_alloca
3039 && cfun->machine->frame.hard_fp_offset < max_push_offset))
3041 /* Frame with small outgoing arguments:
3042 sub sp, sp, frame_size
3043 stp reg1, reg2, [sp, outgoing_args_size]
3044 stp reg3, reg4, [sp, outgoing_args_size + 16] */
3045 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
3046 cfun->machine->frame.callee_offset
3047 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
3049 else if (cfun->machine->frame.hard_fp_offset < max_push_offset)
3051 /* Frame with large outgoing arguments but a small local area:
3052 stp reg1, reg2, [sp, -hard_fp_offset]!
3053 stp reg3, reg4, [sp, 16]
3054 sub sp, sp, outgoing_args_size */
3055 cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset;
3056 cfun->machine->frame.final_adjust
3057 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
3059 else
3061 /* Frame with large local area and outgoing arguments using frame pointer:
3062 sub sp, sp, hard_fp_offset
3063 stp x29, x30, [sp, 0]
3064 add x29, sp, 0
3065 stp reg3, reg4, [sp, 16]
3066 sub sp, sp, outgoing_args_size */
3067 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
3068 cfun->machine->frame.final_adjust
3069 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
3072 cfun->machine->frame.laid_out = true;
3075 /* Return true if the register REGNO is saved on entry to
3076 the current function. */
3078 static bool
3079 aarch64_register_saved_on_entry (int regno)
3081 return cfun->machine->frame.reg_offset[regno] >= 0;
3084 /* Return the next register up from REGNO up to LIMIT for the callee
3085 to save. */
3087 static unsigned
3088 aarch64_next_callee_save (unsigned regno, unsigned limit)
3090 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
3091 regno ++;
3092 return regno;
3095 /* Push the register number REGNO of mode MODE to the stack with write-back
3096 adjusting the stack by ADJUSTMENT. */
3098 static void
3099 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
3100 HOST_WIDE_INT adjustment)
3102 rtx base_rtx = stack_pointer_rtx;
3103 rtx insn, reg, mem;
3105 reg = gen_rtx_REG (mode, regno);
3106 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
3107 plus_constant (Pmode, base_rtx, -adjustment));
3108 mem = gen_frame_mem (mode, mem);
3110 insn = emit_move_insn (mem, reg);
3111 RTX_FRAME_RELATED_P (insn) = 1;
3114 /* Generate and return an instruction to store the pair of registers
3115 REG and REG2 of mode MODE to location BASE with write-back adjusting
3116 the stack location BASE by ADJUSTMENT. */
3118 static rtx
3119 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
3120 HOST_WIDE_INT adjustment)
3122 switch (mode)
3124 case E_DImode:
3125 return gen_storewb_pairdi_di (base, base, reg, reg2,
3126 GEN_INT (-adjustment),
3127 GEN_INT (UNITS_PER_WORD - adjustment));
3128 case E_DFmode:
3129 return gen_storewb_pairdf_di (base, base, reg, reg2,
3130 GEN_INT (-adjustment),
3131 GEN_INT (UNITS_PER_WORD - adjustment));
3132 default:
3133 gcc_unreachable ();
3137 /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3138 stack pointer by ADJUSTMENT. */
3140 static void
3141 aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
3143 rtx_insn *insn;
3144 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
3146 if (regno2 == INVALID_REGNUM)
3147 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
3149 rtx reg1 = gen_rtx_REG (mode, regno1);
3150 rtx reg2 = gen_rtx_REG (mode, regno2);
3152 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
3153 reg2, adjustment));
3154 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
3155 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3156 RTX_FRAME_RELATED_P (insn) = 1;
3159 /* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3160 adjusting it by ADJUSTMENT afterwards. */
3162 static rtx
3163 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
3164 HOST_WIDE_INT adjustment)
3166 switch (mode)
3168 case E_DImode:
3169 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3170 GEN_INT (UNITS_PER_WORD));
3171 case E_DFmode:
3172 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3173 GEN_INT (UNITS_PER_WORD));
3174 default:
3175 gcc_unreachable ();
3179 /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3180 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3181 into CFI_OPS. */
3183 static void
3184 aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
3185 rtx *cfi_ops)
3187 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
3188 rtx reg1 = gen_rtx_REG (mode, regno1);
3190 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
3192 if (regno2 == INVALID_REGNUM)
3194 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
3195 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
3196 emit_move_insn (reg1, gen_frame_mem (mode, mem));
3198 else
3200 rtx reg2 = gen_rtx_REG (mode, regno2);
3201 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3202 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
3203 reg2, adjustment));
3207 /* Generate and return a store pair instruction of mode MODE to store
3208 register REG1 to MEM1 and register REG2 to MEM2. */
3210 static rtx
3211 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
3212 rtx reg2)
3214 switch (mode)
3216 case E_DImode:
3217 return gen_store_pairdi (mem1, reg1, mem2, reg2);
3219 case E_DFmode:
3220 return gen_store_pairdf (mem1, reg1, mem2, reg2);
3222 default:
3223 gcc_unreachable ();
3227 /* Generate and regurn a load pair isntruction of mode MODE to load register
3228 REG1 from MEM1 and register REG2 from MEM2. */
3230 static rtx
3231 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
3232 rtx mem2)
3234 switch (mode)
3236 case E_DImode:
3237 return gen_load_pairdi (reg1, mem1, reg2, mem2);
3239 case E_DFmode:
3240 return gen_load_pairdf (reg1, mem1, reg2, mem2);
3242 default:
3243 gcc_unreachable ();
3247 /* Return TRUE if return address signing should be enabled for the current
3248 function, otherwise return FALSE. */
3250 bool
3251 aarch64_return_address_signing_enabled (void)
3253 /* This function should only be called after frame laid out. */
3254 gcc_assert (cfun->machine->frame.laid_out);
3256 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3257 if it's LR is pushed onto stack. */
3258 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
3259 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
3260 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
3263 /* Emit code to save the callee-saved registers from register number START
3264 to LIMIT to the stack at the location starting at offset START_OFFSET,
3265 skipping any write-back candidates if SKIP_WB is true. */
3267 static void
3268 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
3269 unsigned start, unsigned limit, bool skip_wb)
3271 rtx_insn *insn;
3272 unsigned regno;
3273 unsigned regno2;
3275 for (regno = aarch64_next_callee_save (start, limit);
3276 regno <= limit;
3277 regno = aarch64_next_callee_save (regno + 1, limit))
3279 rtx reg, mem;
3280 HOST_WIDE_INT offset;
3282 if (skip_wb
3283 && (regno == cfun->machine->frame.wb_candidate1
3284 || regno == cfun->machine->frame.wb_candidate2))
3285 continue;
3287 if (cfun->machine->reg_is_wrapped_separately[regno])
3288 continue;
3290 reg = gen_rtx_REG (mode, regno);
3291 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3292 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3293 offset));
3295 regno2 = aarch64_next_callee_save (regno + 1, limit);
3297 if (regno2 <= limit
3298 && !cfun->machine->reg_is_wrapped_separately[regno2]
3299 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3300 == cfun->machine->frame.reg_offset[regno2]))
3303 rtx reg2 = gen_rtx_REG (mode, regno2);
3304 rtx mem2;
3306 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3307 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
3308 offset));
3309 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
3310 reg2));
3312 /* The first part of a frame-related parallel insn is
3313 always assumed to be relevant to the frame
3314 calculations; subsequent parts, are only
3315 frame-related if explicitly marked. */
3316 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3317 regno = regno2;
3319 else
3320 insn = emit_move_insn (mem, reg);
3322 RTX_FRAME_RELATED_P (insn) = 1;
3326 /* Emit code to restore the callee registers of mode MODE from register
3327 number START up to and including LIMIT. Restore from the stack offset
3328 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3329 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3331 static void
3332 aarch64_restore_callee_saves (machine_mode mode,
3333 HOST_WIDE_INT start_offset, unsigned start,
3334 unsigned limit, bool skip_wb, rtx *cfi_ops)
3336 rtx base_rtx = stack_pointer_rtx;
3337 unsigned regno;
3338 unsigned regno2;
3339 HOST_WIDE_INT offset;
3341 for (regno = aarch64_next_callee_save (start, limit);
3342 regno <= limit;
3343 regno = aarch64_next_callee_save (regno + 1, limit))
3345 if (cfun->machine->reg_is_wrapped_separately[regno])
3346 continue;
3348 rtx reg, mem;
3350 if (skip_wb
3351 && (regno == cfun->machine->frame.wb_candidate1
3352 || regno == cfun->machine->frame.wb_candidate2))
3353 continue;
3355 reg = gen_rtx_REG (mode, regno);
3356 offset = start_offset + cfun->machine->frame.reg_offset[regno];
3357 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
3359 regno2 = aarch64_next_callee_save (regno + 1, limit);
3361 if (regno2 <= limit
3362 && !cfun->machine->reg_is_wrapped_separately[regno2]
3363 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
3364 == cfun->machine->frame.reg_offset[regno2]))
3366 rtx reg2 = gen_rtx_REG (mode, regno2);
3367 rtx mem2;
3369 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
3370 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
3371 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3373 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
3374 regno = regno2;
3376 else
3377 emit_move_insn (reg, mem);
3378 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
3382 static inline bool
3383 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3384 HOST_WIDE_INT offset)
3386 return offset >= -256 && offset < 256;
3389 static inline bool
3390 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3392 return (offset >= 0
3393 && offset < 4096 * GET_MODE_SIZE (mode)
3394 && offset % GET_MODE_SIZE (mode) == 0);
3397 bool
3398 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3400 return (offset >= -64 * GET_MODE_SIZE (mode)
3401 && offset < 64 * GET_MODE_SIZE (mode)
3402 && offset % GET_MODE_SIZE (mode) == 0);
3405 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3407 static sbitmap
3408 aarch64_get_separate_components (void)
3410 aarch64_layout_frame ();
3412 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3413 bitmap_clear (components);
3415 /* The registers we need saved to the frame. */
3416 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3417 if (aarch64_register_saved_on_entry (regno))
3419 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3420 if (!frame_pointer_needed)
3421 offset += cfun->machine->frame.frame_size
3422 - cfun->machine->frame.hard_fp_offset;
3423 /* Check that we can access the stack slot of the register with one
3424 direct load with no adjustments needed. */
3425 if (offset_12bit_unsigned_scaled_p (DImode, offset))
3426 bitmap_set_bit (components, regno);
3429 /* Don't mess with the hard frame pointer. */
3430 if (frame_pointer_needed)
3431 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
3433 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3434 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3435 /* If aarch64_layout_frame has chosen registers to store/restore with
3436 writeback don't interfere with them to avoid having to output explicit
3437 stack adjustment instructions. */
3438 if (reg2 != INVALID_REGNUM)
3439 bitmap_clear_bit (components, reg2);
3440 if (reg1 != INVALID_REGNUM)
3441 bitmap_clear_bit (components, reg1);
3443 bitmap_clear_bit (components, LR_REGNUM);
3444 bitmap_clear_bit (components, SP_REGNUM);
3446 return components;
3449 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3451 static sbitmap
3452 aarch64_components_for_bb (basic_block bb)
3454 bitmap in = DF_LIVE_IN (bb);
3455 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
3456 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
3458 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
3459 bitmap_clear (components);
3461 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3462 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3463 if ((!call_used_regs[regno])
3464 && (bitmap_bit_p (in, regno)
3465 || bitmap_bit_p (gen, regno)
3466 || bitmap_bit_p (kill, regno)))
3467 bitmap_set_bit (components, regno);
3469 return components;
3472 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3473 Nothing to do for aarch64. */
3475 static void
3476 aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
3480 /* Return the next set bit in BMP from START onwards. Return the total number
3481 of bits in BMP if no set bit is found at or after START. */
3483 static unsigned int
3484 aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
3486 unsigned int nbits = SBITMAP_SIZE (bmp);
3487 if (start == nbits)
3488 return start;
3490 gcc_assert (start < nbits);
3491 for (unsigned int i = start; i < nbits; i++)
3492 if (bitmap_bit_p (bmp, i))
3493 return i;
3495 return nbits;
3498 /* Do the work for aarch64_emit_prologue_components and
3499 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3500 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3501 for these components or the epilogue sequence. That is, it determines
3502 whether we should emit stores or loads and what kind of CFA notes to attach
3503 to the insns. Otherwise the logic for the two sequences is very
3504 similar. */
3506 static void
3507 aarch64_process_components (sbitmap components, bool prologue_p)
3509 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
3510 ? HARD_FRAME_POINTER_REGNUM
3511 : STACK_POINTER_REGNUM);
3513 unsigned last_regno = SBITMAP_SIZE (components);
3514 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
3515 rtx_insn *insn = NULL;
3517 while (regno != last_regno)
3519 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3520 so DFmode for the vector registers is enough. */
3521 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
3522 rtx reg = gen_rtx_REG (mode, regno);
3523 HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
3524 if (!frame_pointer_needed)
3525 offset += cfun->machine->frame.frame_size
3526 - cfun->machine->frame.hard_fp_offset;
3527 rtx addr = plus_constant (Pmode, ptr_reg, offset);
3528 rtx mem = gen_frame_mem (mode, addr);
3530 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
3531 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
3532 /* No more registers to handle after REGNO.
3533 Emit a single save/restore and exit. */
3534 if (regno2 == last_regno)
3536 insn = emit_insn (set);
3537 RTX_FRAME_RELATED_P (insn) = 1;
3538 if (prologue_p)
3539 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3540 else
3541 add_reg_note (insn, REG_CFA_RESTORE, reg);
3542 break;
3545 HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
3546 /* The next register is not of the same class or its offset is not
3547 mergeable with the current one into a pair. */
3548 if (!satisfies_constraint_Ump (mem)
3549 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
3550 || (offset2 - cfun->machine->frame.reg_offset[regno])
3551 != GET_MODE_SIZE (mode))
3553 insn = emit_insn (set);
3554 RTX_FRAME_RELATED_P (insn) = 1;
3555 if (prologue_p)
3556 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
3557 else
3558 add_reg_note (insn, REG_CFA_RESTORE, reg);
3560 regno = regno2;
3561 continue;
3564 /* REGNO2 can be saved/restored in a pair with REGNO. */
3565 rtx reg2 = gen_rtx_REG (mode, regno2);
3566 if (!frame_pointer_needed)
3567 offset2 += cfun->machine->frame.frame_size
3568 - cfun->machine->frame.hard_fp_offset;
3569 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
3570 rtx mem2 = gen_frame_mem (mode, addr2);
3571 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
3572 : gen_rtx_SET (reg2, mem2);
3574 if (prologue_p)
3575 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
3576 else
3577 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
3579 RTX_FRAME_RELATED_P (insn) = 1;
3580 if (prologue_p)
3582 add_reg_note (insn, REG_CFA_OFFSET, set);
3583 add_reg_note (insn, REG_CFA_OFFSET, set2);
3585 else
3587 add_reg_note (insn, REG_CFA_RESTORE, reg);
3588 add_reg_note (insn, REG_CFA_RESTORE, reg2);
3591 regno = aarch64_get_next_set_bit (components, regno2 + 1);
3595 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3597 static void
3598 aarch64_emit_prologue_components (sbitmap components)
3600 aarch64_process_components (components, true);
3603 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3605 static void
3606 aarch64_emit_epilogue_components (sbitmap components)
3608 aarch64_process_components (components, false);
3611 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3613 static void
3614 aarch64_set_handled_components (sbitmap components)
3616 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
3617 if (bitmap_bit_p (components, regno))
3618 cfun->machine->reg_is_wrapped_separately[regno] = true;
3621 /* AArch64 stack frames generated by this compiler look like:
3623 +-------------------------------+
3625 | incoming stack arguments |
3627 +-------------------------------+
3628 | | <-- incoming stack pointer (aligned)
3629 | callee-allocated save area |
3630 | for register varargs |
3632 +-------------------------------+
3633 | local variables | <-- frame_pointer_rtx
3635 +-------------------------------+
3636 | padding0 | \
3637 +-------------------------------+ |
3638 | callee-saved registers | | frame.saved_regs_size
3639 +-------------------------------+ |
3640 | LR' | |
3641 +-------------------------------+ |
3642 | FP' | / <- hard_frame_pointer_rtx (aligned)
3643 +-------------------------------+
3644 | dynamic allocation |
3645 +-------------------------------+
3646 | padding |
3647 +-------------------------------+
3648 | outgoing stack arguments | <-- arg_pointer
3650 +-------------------------------+
3651 | | <-- stack_pointer_rtx (aligned)
3653 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3654 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3655 unchanged. */
3657 /* Generate the prologue instructions for entry into a function.
3658 Establish the stack frame by decreasing the stack pointer with a
3659 properly calculated size and, if necessary, create a frame record
3660 filled with the values of LR and previous frame pointer. The
3661 current FP is also set up if it is in use. */
3663 void
3664 aarch64_expand_prologue (void)
3666 aarch64_layout_frame ();
3668 HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
3669 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3670 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3671 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3672 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3673 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3674 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3675 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
3676 rtx_insn *insn;
3678 /* Sign return address for functions. */
3679 if (aarch64_return_address_signing_enabled ())
3681 insn = emit_insn (gen_pacisp ());
3682 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3683 RTX_FRAME_RELATED_P (insn) = 1;
3686 if (flag_stack_usage_info)
3687 current_function_static_stack_size = frame_size;
3689 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3691 if (crtl->is_leaf && !cfun->calls_alloca)
3693 if (frame_size > PROBE_INTERVAL
3694 && frame_size > get_stack_check_protect ())
3695 aarch64_emit_probe_stack_range (get_stack_check_protect (),
3696 (frame_size
3697 - get_stack_check_protect ()));
3699 else if (frame_size > 0)
3700 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
3703 aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
3705 if (callee_adjust != 0)
3706 aarch64_push_regs (reg1, reg2, callee_adjust);
3708 if (emit_frame_chain)
3710 if (callee_adjust == 0)
3711 aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
3712 R30_REGNUM, false);
3713 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
3714 stack_pointer_rtx,
3715 GEN_INT (callee_offset)));
3716 RTX_FRAME_RELATED_P (insn) = frame_pointer_needed;
3717 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
3720 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3721 callee_adjust != 0 || emit_frame_chain);
3722 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3723 callee_adjust != 0 || emit_frame_chain);
3724 aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
3727 /* Return TRUE if we can use a simple_return insn.
3729 This function checks whether the callee saved stack is empty, which
3730 means no restore actions are need. The pro_and_epilogue will use
3731 this to check whether shrink-wrapping opt is feasible. */
3733 bool
3734 aarch64_use_return_insn_p (void)
3736 if (!reload_completed)
3737 return false;
3739 if (crtl->profile)
3740 return false;
3742 aarch64_layout_frame ();
3744 return cfun->machine->frame.frame_size == 0;
3747 /* Generate the epilogue instructions for returning from a function.
3748 This is almost exactly the reverse of the prolog sequence, except
3749 that we need to insert barriers to avoid scheduling loads that read
3750 from a deallocated stack, and we optimize the unwind records by
3751 emitting them all together if possible. */
3752 void
3753 aarch64_expand_epilogue (bool for_sibcall)
3755 aarch64_layout_frame ();
3757 HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust;
3758 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
3759 HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust;
3760 HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
3761 unsigned reg1 = cfun->machine->frame.wb_candidate1;
3762 unsigned reg2 = cfun->machine->frame.wb_candidate2;
3763 rtx cfi_ops = NULL;
3764 rtx_insn *insn;
3766 /* We need to add memory barrier to prevent read from deallocated stack. */
3767 bool need_barrier_p = (get_frame_size ()
3768 + cfun->machine->frame.saved_varargs_size) != 0;
3770 /* Emit a barrier to prevent loads from a deallocated stack. */
3771 if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
3772 || crtl->calls_eh_return)
3774 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3775 need_barrier_p = false;
3778 /* Restore the stack pointer from the frame pointer if it may not
3779 be the same as the stack pointer. */
3780 if (frame_pointer_needed && (final_adjust || cfun->calls_alloca))
3782 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
3783 hard_frame_pointer_rtx,
3784 GEN_INT (-callee_offset)));
3785 /* If writeback is used when restoring callee-saves, the CFA
3786 is restored on the instruction doing the writeback. */
3787 RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
3789 else
3790 aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
3792 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
3793 callee_adjust != 0, &cfi_ops);
3794 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
3795 callee_adjust != 0, &cfi_ops);
3797 if (need_barrier_p)
3798 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
3800 if (callee_adjust != 0)
3801 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
3803 if (callee_adjust != 0 || initial_adjust > 65536)
3805 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
3806 insn = get_last_insn ();
3807 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
3808 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
3809 RTX_FRAME_RELATED_P (insn) = 1;
3810 cfi_ops = NULL;
3813 aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
3815 if (cfi_ops)
3817 /* Emit delayed restores and reset the CFA to be SP. */
3818 insn = get_last_insn ();
3819 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
3820 REG_NOTES (insn) = cfi_ops;
3821 RTX_FRAME_RELATED_P (insn) = 1;
3824 /* We prefer to emit the combined return/authenticate instruction RETAA,
3825 however there are three cases in which we must instead emit an explicit
3826 authentication instruction.
3828 1) Sibcalls don't return in a normal way, so if we're about to call one
3829 we must authenticate.
3831 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3832 generating code for !TARGET_ARMV8_3 we can't use it and must
3833 explicitly authenticate.
3835 3) On an eh_return path we make extra stack adjustments to update the
3836 canonical frame address to be the exception handler's CFA. We want
3837 to authenticate using the CFA of the function which calls eh_return.
3839 if (aarch64_return_address_signing_enabled ()
3840 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
3842 insn = emit_insn (gen_autisp ());
3843 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
3844 RTX_FRAME_RELATED_P (insn) = 1;
3847 /* Stack adjustment for exception handler. */
3848 if (crtl->calls_eh_return)
3850 /* We need to unwind the stack by the offset computed by
3851 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3852 to be SP; letting the CFA move during this adjustment
3853 is just as correct as retaining the CFA from the body
3854 of the function. Therefore, do nothing special. */
3855 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
3858 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
3859 if (!for_sibcall)
3860 emit_jump_insn (ret_rtx);
3863 /* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3864 normally or return to a previous frame after unwinding.
3866 An EH return uses a single shared return sequence. The epilogue is
3867 exactly like a normal epilogue except that it has an extra input
3868 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3869 that must be applied after the frame has been destroyed. An extra label
3870 is inserted before the epilogue which initializes this register to zero,
3871 and this is the entry point for a normal return.
3873 An actual EH return updates the return address, initializes the stack
3874 adjustment and jumps directly into the epilogue (bypassing the zeroing
3875 of the adjustment). Since the return address is typically saved on the
3876 stack when a function makes a call, the saved LR must be updated outside
3877 the epilogue.
3879 This poses problems as the store is generated well before the epilogue,
3880 so the offset of LR is not known yet. Also optimizations will remove the
3881 store as it appears dead, even after the epilogue is generated (as the
3882 base or offset for loading LR is different in many cases).
3884 To avoid these problems this implementation forces the frame pointer
3885 in eh_return functions so that the location of LR is fixed and known early.
3886 It also marks the store volatile, so no optimization is permitted to
3887 remove the store. */
3889 aarch64_eh_return_handler_rtx (void)
3891 rtx tmp = gen_frame_mem (Pmode,
3892 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
3894 /* Mark the store volatile, so no optimization is permitted to remove it. */
3895 MEM_VOLATILE_P (tmp) = true;
3896 return tmp;
3899 /* Output code to add DELTA to the first argument, and then jump
3900 to FUNCTION. Used for C++ multiple inheritance. */
3901 static void
3902 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
3903 HOST_WIDE_INT delta,
3904 HOST_WIDE_INT vcall_offset,
3905 tree function)
3907 /* The this pointer is always in x0. Note that this differs from
3908 Arm where the this pointer maybe bumped to r1 if r0 is required
3909 to return a pointer to an aggregate. On AArch64 a result value
3910 pointer will be in x8. */
3911 int this_regno = R0_REGNUM;
3912 rtx this_rtx, temp0, temp1, addr, funexp;
3913 rtx_insn *insn;
3915 reload_completed = 1;
3916 emit_note (NOTE_INSN_PROLOGUE_END);
3918 if (vcall_offset == 0)
3919 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
3920 else
3922 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
3924 this_rtx = gen_rtx_REG (Pmode, this_regno);
3925 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
3926 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
3928 addr = this_rtx;
3929 if (delta != 0)
3931 if (delta >= -256 && delta < 256)
3932 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
3933 plus_constant (Pmode, this_rtx, delta));
3934 else
3935 aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
3938 if (Pmode == ptr_mode)
3939 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
3940 else
3941 aarch64_emit_move (temp0,
3942 gen_rtx_ZERO_EXTEND (Pmode,
3943 gen_rtx_MEM (ptr_mode, addr)));
3945 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
3946 addr = plus_constant (Pmode, temp0, vcall_offset);
3947 else
3949 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
3950 Pmode);
3951 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
3954 if (Pmode == ptr_mode)
3955 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
3956 else
3957 aarch64_emit_move (temp1,
3958 gen_rtx_SIGN_EXTEND (Pmode,
3959 gen_rtx_MEM (ptr_mode, addr)));
3961 emit_insn (gen_add2_insn (this_rtx, temp1));
3964 /* Generate a tail call to the target function. */
3965 if (!TREE_USED (function))
3967 assemble_external (function);
3968 TREE_USED (function) = 1;
3970 funexp = XEXP (DECL_RTL (function), 0);
3971 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
3972 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
3973 SIBLING_CALL_P (insn) = 1;
3975 insn = get_insns ();
3976 shorten_branches (insn);
3977 final_start_function (insn, file, 1);
3978 final (insn, file, 1);
3979 final_end_function ();
3981 /* Stop pretending to be a post-reload pass. */
3982 reload_completed = 0;
3985 static bool
3986 aarch64_tls_referenced_p (rtx x)
3988 if (!TARGET_HAVE_TLS)
3989 return false;
3990 subrtx_iterator::array_type array;
3991 FOR_EACH_SUBRTX (iter, array, x, ALL)
3993 const_rtx x = *iter;
3994 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
3995 return true;
3996 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3997 TLS offsets, not real symbol references. */
3998 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3999 iter.skip_subrtxes ();
4001 return false;
4005 /* Return true if val can be encoded as a 12-bit unsigned immediate with
4006 a left shift of 0 or 12 bits. */
4007 bool
4008 aarch64_uimm12_shift (HOST_WIDE_INT val)
4010 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
4011 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
4016 /* Return true if val is an immediate that can be loaded into a
4017 register by a MOVZ instruction. */
4018 static bool
4019 aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
4021 if (GET_MODE_SIZE (mode) > 4)
4023 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
4024 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
4025 return 1;
4027 else
4029 /* Ignore sign extension. */
4030 val &= (HOST_WIDE_INT) 0xffffffff;
4032 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
4033 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
4036 /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
4038 static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
4040 0x0000000100000001ull,
4041 0x0001000100010001ull,
4042 0x0101010101010101ull,
4043 0x1111111111111111ull,
4044 0x5555555555555555ull,
4048 /* Return true if val is a valid bitmask immediate. */
4050 bool
4051 aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
4053 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
4054 int bits;
4056 /* Check for a single sequence of one bits and return quickly if so.
4057 The special cases of all ones and all zeroes returns false. */
4058 val = (unsigned HOST_WIDE_INT) val_in;
4059 tmp = val + (val & -val);
4061 if (tmp == (tmp & -tmp))
4062 return (val + 1) > 1;
4064 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4065 if (mode == SImode)
4066 val = (val << 32) | (val & 0xffffffff);
4068 /* Invert if the immediate doesn't start with a zero bit - this means we
4069 only need to search for sequences of one bits. */
4070 if (val & 1)
4071 val = ~val;
4073 /* Find the first set bit and set tmp to val with the first sequence of one
4074 bits removed. Return success if there is a single sequence of ones. */
4075 first_one = val & -val;
4076 tmp = val & (val + first_one);
4078 if (tmp == 0)
4079 return true;
4081 /* Find the next set bit and compute the difference in bit position. */
4082 next_one = tmp & -tmp;
4083 bits = clz_hwi (first_one) - clz_hwi (next_one);
4084 mask = val ^ tmp;
4086 /* Check the bit position difference is a power of 2, and that the first
4087 sequence of one bits fits within 'bits' bits. */
4088 if ((mask >> bits) != 0 || bits != (bits & -bits))
4089 return false;
4091 /* Check the sequence of one bits is repeated 64/bits times. */
4092 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
4095 /* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4096 Assumed precondition: VAL_IN Is not zero. */
4098 unsigned HOST_WIDE_INT
4099 aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
4101 int lowest_bit_set = ctz_hwi (val_in);
4102 int highest_bit_set = floor_log2 (val_in);
4103 gcc_assert (val_in != 0);
4105 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
4106 (HOST_WIDE_INT_1U << lowest_bit_set));
4109 /* Create constant where bits outside of lowest bit set to highest bit set
4110 are set to 1. */
4112 unsigned HOST_WIDE_INT
4113 aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
4115 return val_in | ~aarch64_and_split_imm1 (val_in);
4118 /* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4120 bool
4121 aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
4123 scalar_int_mode int_mode;
4124 if (!is_a <scalar_int_mode> (mode, &int_mode))
4125 return false;
4127 if (aarch64_bitmask_imm (val_in, int_mode))
4128 return false;
4130 if (aarch64_move_imm (val_in, int_mode))
4131 return false;
4133 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
4135 return aarch64_bitmask_imm (imm2, int_mode);
4138 /* Return true if val is an immediate that can be loaded into a
4139 register in a single instruction. */
4140 bool
4141 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
4143 scalar_int_mode int_mode;
4144 if (!is_a <scalar_int_mode> (mode, &int_mode))
4145 return false;
4147 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
4148 return 1;
4149 return aarch64_bitmask_imm (val, int_mode);
4152 static bool
4153 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4155 rtx base, offset;
4157 if (GET_CODE (x) == HIGH)
4158 return true;
4160 split_const (x, &base, &offset);
4161 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
4163 if (aarch64_classify_symbol (base, offset)
4164 != SYMBOL_FORCE_TO_MEM)
4165 return true;
4166 else
4167 /* Avoid generating a 64-bit relocation in ILP32; leave
4168 to aarch64_expand_mov_immediate to handle it properly. */
4169 return mode != ptr_mode;
4172 return aarch64_tls_referenced_p (x);
4175 /* Implement TARGET_CASE_VALUES_THRESHOLD.
4176 The expansion for a table switch is quite expensive due to the number
4177 of instructions, the table lookup and hard to predict indirect jump.
4178 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4179 set, otherwise use tables for > 16 cases as a tradeoff between size and
4180 performance. When optimizing for size, use the default setting. */
4182 static unsigned int
4183 aarch64_case_values_threshold (void)
4185 /* Use the specified limit for the number of cases before using jump
4186 tables at higher optimization levels. */
4187 if (optimize > 2
4188 && selected_cpu->tune->max_case_values != 0)
4189 return selected_cpu->tune->max_case_values;
4190 else
4191 return optimize_size ? default_case_values_threshold () : 17;
4194 /* Return true if register REGNO is a valid index register.
4195 STRICT_P is true if REG_OK_STRICT is in effect. */
4197 bool
4198 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
4200 if (!HARD_REGISTER_NUM_P (regno))
4202 if (!strict_p)
4203 return true;
4205 if (!reg_renumber)
4206 return false;
4208 regno = reg_renumber[regno];
4210 return GP_REGNUM_P (regno);
4213 /* Return true if register REGNO is a valid base register for mode MODE.
4214 STRICT_P is true if REG_OK_STRICT is in effect. */
4216 bool
4217 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
4219 if (!HARD_REGISTER_NUM_P (regno))
4221 if (!strict_p)
4222 return true;
4224 if (!reg_renumber)
4225 return false;
4227 regno = reg_renumber[regno];
4230 /* The fake registers will be eliminated to either the stack or
4231 hard frame pointer, both of which are usually valid base registers.
4232 Reload deals with the cases where the eliminated form isn't valid. */
4233 return (GP_REGNUM_P (regno)
4234 || regno == SP_REGNUM
4235 || regno == FRAME_POINTER_REGNUM
4236 || regno == ARG_POINTER_REGNUM);
4239 /* Return true if X is a valid base register for mode MODE.
4240 STRICT_P is true if REG_OK_STRICT is in effect. */
4242 static bool
4243 aarch64_base_register_rtx_p (rtx x, bool strict_p)
4245 if (!strict_p
4246 && GET_CODE (x) == SUBREG
4247 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
4248 x = SUBREG_REG (x);
4250 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
4253 /* Return true if address offset is a valid index. If it is, fill in INFO
4254 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4256 static bool
4257 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
4258 machine_mode mode, bool strict_p)
4260 enum aarch64_address_type type;
4261 rtx index;
4262 int shift;
4264 /* (reg:P) */
4265 if ((REG_P (x) || GET_CODE (x) == SUBREG)
4266 && GET_MODE (x) == Pmode)
4268 type = ADDRESS_REG_REG;
4269 index = x;
4270 shift = 0;
4272 /* (sign_extend:DI (reg:SI)) */
4273 else if ((GET_CODE (x) == SIGN_EXTEND
4274 || GET_CODE (x) == ZERO_EXTEND)
4275 && GET_MODE (x) == DImode
4276 && GET_MODE (XEXP (x, 0)) == SImode)
4278 type = (GET_CODE (x) == SIGN_EXTEND)
4279 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4280 index = XEXP (x, 0);
4281 shift = 0;
4283 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4284 else if (GET_CODE (x) == MULT
4285 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4286 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4287 && GET_MODE (XEXP (x, 0)) == DImode
4288 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4289 && CONST_INT_P (XEXP (x, 1)))
4291 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4292 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4293 index = XEXP (XEXP (x, 0), 0);
4294 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4296 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4297 else if (GET_CODE (x) == ASHIFT
4298 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
4299 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
4300 && GET_MODE (XEXP (x, 0)) == DImode
4301 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
4302 && CONST_INT_P (XEXP (x, 1)))
4304 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4305 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4306 index = XEXP (XEXP (x, 0), 0);
4307 shift = INTVAL (XEXP (x, 1));
4309 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4310 else if ((GET_CODE (x) == SIGN_EXTRACT
4311 || GET_CODE (x) == ZERO_EXTRACT)
4312 && GET_MODE (x) == DImode
4313 && GET_CODE (XEXP (x, 0)) == MULT
4314 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4315 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4317 type = (GET_CODE (x) == SIGN_EXTRACT)
4318 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4319 index = XEXP (XEXP (x, 0), 0);
4320 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4321 if (INTVAL (XEXP (x, 1)) != 32 + shift
4322 || INTVAL (XEXP (x, 2)) != 0)
4323 shift = -1;
4325 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4326 (const_int 0xffffffff<<shift)) */
4327 else if (GET_CODE (x) == AND
4328 && GET_MODE (x) == DImode
4329 && GET_CODE (XEXP (x, 0)) == MULT
4330 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4331 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4332 && CONST_INT_P (XEXP (x, 1)))
4334 type = ADDRESS_REG_UXTW;
4335 index = XEXP (XEXP (x, 0), 0);
4336 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
4337 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4338 shift = -1;
4340 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4341 else if ((GET_CODE (x) == SIGN_EXTRACT
4342 || GET_CODE (x) == ZERO_EXTRACT)
4343 && GET_MODE (x) == DImode
4344 && GET_CODE (XEXP (x, 0)) == ASHIFT
4345 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4346 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
4348 type = (GET_CODE (x) == SIGN_EXTRACT)
4349 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
4350 index = XEXP (XEXP (x, 0), 0);
4351 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4352 if (INTVAL (XEXP (x, 1)) != 32 + shift
4353 || INTVAL (XEXP (x, 2)) != 0)
4354 shift = -1;
4356 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4357 (const_int 0xffffffff<<shift)) */
4358 else if (GET_CODE (x) == AND
4359 && GET_MODE (x) == DImode
4360 && GET_CODE (XEXP (x, 0)) == ASHIFT
4361 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
4362 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4363 && CONST_INT_P (XEXP (x, 1)))
4365 type = ADDRESS_REG_UXTW;
4366 index = XEXP (XEXP (x, 0), 0);
4367 shift = INTVAL (XEXP (XEXP (x, 0), 1));
4368 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
4369 shift = -1;
4371 /* (mult:P (reg:P) (const_int scale)) */
4372 else if (GET_CODE (x) == MULT
4373 && GET_MODE (x) == Pmode
4374 && GET_MODE (XEXP (x, 0)) == Pmode
4375 && CONST_INT_P (XEXP (x, 1)))
4377 type = ADDRESS_REG_REG;
4378 index = XEXP (x, 0);
4379 shift = exact_log2 (INTVAL (XEXP (x, 1)));
4381 /* (ashift:P (reg:P) (const_int shift)) */
4382 else if (GET_CODE (x) == ASHIFT
4383 && GET_MODE (x) == Pmode
4384 && GET_MODE (XEXP (x, 0)) == Pmode
4385 && CONST_INT_P (XEXP (x, 1)))
4387 type = ADDRESS_REG_REG;
4388 index = XEXP (x, 0);
4389 shift = INTVAL (XEXP (x, 1));
4391 else
4392 return false;
4394 if (!strict_p
4395 && GET_CODE (index) == SUBREG
4396 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
4397 index = SUBREG_REG (index);
4399 if ((shift == 0 ||
4400 (shift > 0 && shift <= 3
4401 && (1 << shift) == GET_MODE_SIZE (mode)))
4402 && REG_P (index)
4403 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
4405 info->type = type;
4406 info->offset = index;
4407 info->shift = shift;
4408 return true;
4411 return false;
4414 /* Return true if MODE is one of the modes for which we
4415 support LDP/STP operations. */
4417 static bool
4418 aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
4420 return mode == SImode || mode == DImode
4421 || mode == SFmode || mode == DFmode
4422 || (aarch64_vector_mode_supported_p (mode)
4423 && GET_MODE_SIZE (mode) == 8);
4426 /* Return true if REGNO is a virtual pointer register, or an eliminable
4427 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4428 include stack_pointer or hard_frame_pointer. */
4429 static bool
4430 virt_or_elim_regno_p (unsigned regno)
4432 return ((regno >= FIRST_VIRTUAL_REGISTER
4433 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
4434 || regno == FRAME_POINTER_REGNUM
4435 || regno == ARG_POINTER_REGNUM);
4438 /* Return true if X is a valid address for machine mode MODE. If it is,
4439 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4440 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4442 static bool
4443 aarch64_classify_address (struct aarch64_address_info *info,
4444 rtx x, machine_mode mode,
4445 RTX_CODE outer_code, bool strict_p)
4447 enum rtx_code code = GET_CODE (x);
4448 rtx op0, op1;
4450 /* On BE, we use load/store pair for all large int mode load/stores.
4451 TI/TFmode may also use a load/store pair. */
4452 bool load_store_pair_p = (outer_code == PARALLEL
4453 || mode == TImode
4454 || mode == TFmode
4455 || (BYTES_BIG_ENDIAN
4456 && aarch64_vect_struct_mode_p (mode)));
4458 bool allow_reg_index_p =
4459 !load_store_pair_p
4460 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
4461 && !aarch64_vect_struct_mode_p (mode);
4463 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4464 REG addressing. */
4465 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
4466 && (code != POST_INC && code != REG))
4467 return false;
4469 switch (code)
4471 case REG:
4472 case SUBREG:
4473 info->type = ADDRESS_REG_IMM;
4474 info->base = x;
4475 info->offset = const0_rtx;
4476 return aarch64_base_register_rtx_p (x, strict_p);
4478 case PLUS:
4479 op0 = XEXP (x, 0);
4480 op1 = XEXP (x, 1);
4482 if (! strict_p
4483 && REG_P (op0)
4484 && virt_or_elim_regno_p (REGNO (op0))
4485 && CONST_INT_P (op1))
4487 info->type = ADDRESS_REG_IMM;
4488 info->base = op0;
4489 info->offset = op1;
4491 return true;
4494 if (GET_MODE_SIZE (mode) != 0
4495 && CONST_INT_P (op1)
4496 && aarch64_base_register_rtx_p (op0, strict_p))
4498 HOST_WIDE_INT offset = INTVAL (op1);
4500 info->type = ADDRESS_REG_IMM;
4501 info->base = op0;
4502 info->offset = op1;
4504 /* TImode and TFmode values are allowed in both pairs of X
4505 registers and individual Q registers. The available
4506 address modes are:
4507 X,X: 7-bit signed scaled offset
4508 Q: 9-bit signed offset
4509 We conservatively require an offset representable in either mode.
4510 When performing the check for pairs of X registers i.e. LDP/STP
4511 pass down DImode since that is the natural size of the LDP/STP
4512 instruction memory accesses. */
4513 if (mode == TImode || mode == TFmode)
4514 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
4515 && (offset_9bit_signed_unscaled_p (mode, offset)
4516 || offset_12bit_unsigned_scaled_p (mode, offset)));
4518 /* A 7bit offset check because OImode will emit a ldp/stp
4519 instruction (only big endian will get here).
4520 For ldp/stp instructions, the offset is scaled for the size of a
4521 single element of the pair. */
4522 if (mode == OImode)
4523 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
4525 /* Three 9/12 bit offsets checks because CImode will emit three
4526 ldr/str instructions (only big endian will get here). */
4527 if (mode == CImode)
4528 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4529 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
4530 || offset_12bit_unsigned_scaled_p (V16QImode,
4531 offset + 32)));
4533 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4534 instructions (only big endian will get here). */
4535 if (mode == XImode)
4536 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
4537 && aarch64_offset_7bit_signed_scaled_p (TImode,
4538 offset + 32));
4540 if (load_store_pair_p)
4541 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
4542 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
4543 else
4544 return (offset_9bit_signed_unscaled_p (mode, offset)
4545 || offset_12bit_unsigned_scaled_p (mode, offset));
4548 if (allow_reg_index_p)
4550 /* Look for base + (scaled/extended) index register. */
4551 if (aarch64_base_register_rtx_p (op0, strict_p)
4552 && aarch64_classify_index (info, op1, mode, strict_p))
4554 info->base = op0;
4555 return true;
4557 if (aarch64_base_register_rtx_p (op1, strict_p)
4558 && aarch64_classify_index (info, op0, mode, strict_p))
4560 info->base = op1;
4561 return true;
4565 return false;
4567 case POST_INC:
4568 case POST_DEC:
4569 case PRE_INC:
4570 case PRE_DEC:
4571 info->type = ADDRESS_REG_WB;
4572 info->base = XEXP (x, 0);
4573 info->offset = NULL_RTX;
4574 return aarch64_base_register_rtx_p (info->base, strict_p);
4576 case POST_MODIFY:
4577 case PRE_MODIFY:
4578 info->type = ADDRESS_REG_WB;
4579 info->base = XEXP (x, 0);
4580 if (GET_CODE (XEXP (x, 1)) == PLUS
4581 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
4582 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
4583 && aarch64_base_register_rtx_p (info->base, strict_p))
4585 HOST_WIDE_INT offset;
4586 info->offset = XEXP (XEXP (x, 1), 1);
4587 offset = INTVAL (info->offset);
4589 /* TImode and TFmode values are allowed in both pairs of X
4590 registers and individual Q registers. The available
4591 address modes are:
4592 X,X: 7-bit signed scaled offset
4593 Q: 9-bit signed offset
4594 We conservatively require an offset representable in either mode.
4596 if (mode == TImode || mode == TFmode)
4597 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
4598 && offset_9bit_signed_unscaled_p (mode, offset));
4600 if (load_store_pair_p)
4601 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
4602 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
4603 else
4604 return offset_9bit_signed_unscaled_p (mode, offset);
4606 return false;
4608 case CONST:
4609 case SYMBOL_REF:
4610 case LABEL_REF:
4611 /* load literal: pc-relative constant pool entry. Only supported
4612 for SI mode or larger. */
4613 info->type = ADDRESS_SYMBOLIC;
4615 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
4617 rtx sym, addend;
4619 split_const (x, &sym, &addend);
4620 return ((GET_CODE (sym) == LABEL_REF
4621 || (GET_CODE (sym) == SYMBOL_REF
4622 && CONSTANT_POOL_ADDRESS_P (sym)
4623 && aarch64_pcrelative_literal_loads)));
4625 return false;
4627 case LO_SUM:
4628 info->type = ADDRESS_LO_SUM;
4629 info->base = XEXP (x, 0);
4630 info->offset = XEXP (x, 1);
4631 if (allow_reg_index_p
4632 && aarch64_base_register_rtx_p (info->base, strict_p))
4634 rtx sym, offs;
4635 split_const (info->offset, &sym, &offs);
4636 if (GET_CODE (sym) == SYMBOL_REF
4637 && (aarch64_classify_symbol (sym, offs) == SYMBOL_SMALL_ABSOLUTE))
4639 /* The symbol and offset must be aligned to the access size. */
4640 unsigned int align;
4641 unsigned int ref_size;
4643 if (CONSTANT_POOL_ADDRESS_P (sym))
4644 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
4645 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
4647 tree exp = SYMBOL_REF_DECL (sym);
4648 align = TYPE_ALIGN (TREE_TYPE (exp));
4649 align = aarch64_constant_alignment (exp, align);
4651 else if (SYMBOL_REF_DECL (sym))
4652 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
4653 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
4654 && SYMBOL_REF_BLOCK (sym) != NULL)
4655 align = SYMBOL_REF_BLOCK (sym)->alignment;
4656 else
4657 align = BITS_PER_UNIT;
4659 ref_size = GET_MODE_SIZE (mode);
4660 if (ref_size == 0)
4661 ref_size = GET_MODE_SIZE (DImode);
4663 return ((INTVAL (offs) & (ref_size - 1)) == 0
4664 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
4667 return false;
4669 default:
4670 return false;
4674 /* Return true if the address X is valid for a PRFM instruction.
4675 STRICT_P is true if we should do strict checking with
4676 aarch64_classify_address. */
4678 bool
4679 aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
4681 struct aarch64_address_info addr;
4683 /* PRFM accepts the same addresses as DImode... */
4684 bool res = aarch64_classify_address (&addr, x, DImode, MEM, strict_p);
4685 if (!res)
4686 return false;
4688 /* ... except writeback forms. */
4689 return addr.type != ADDRESS_REG_WB;
4692 bool
4693 aarch64_symbolic_address_p (rtx x)
4695 rtx offset;
4697 split_const (x, &x, &offset);
4698 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
4701 /* Classify the base of symbolic expression X. */
4703 enum aarch64_symbol_type
4704 aarch64_classify_symbolic_expression (rtx x)
4706 rtx offset;
4708 split_const (x, &x, &offset);
4709 return aarch64_classify_symbol (x, offset);
4713 /* Return TRUE if X is a legitimate address for accessing memory in
4714 mode MODE. */
4715 static bool
4716 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
4718 struct aarch64_address_info addr;
4720 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
4723 /* Return TRUE if X is a legitimate address for accessing memory in
4724 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4725 pair operation. */
4726 bool
4727 aarch64_legitimate_address_p (machine_mode mode, rtx x,
4728 RTX_CODE outer_code, bool strict_p)
4730 struct aarch64_address_info addr;
4732 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
4735 /* Split an out-of-range address displacement into a base and offset.
4736 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4737 to increase opportunities for sharing the base address of different sizes.
4738 Unaligned accesses use the signed 9-bit range, TImode/TFmode use
4739 the intersection of signed scaled 7-bit and signed 9-bit offset. */
4740 static bool
4741 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
4743 HOST_WIDE_INT offset = INTVAL (*disp);
4744 HOST_WIDE_INT base;
4746 if (mode == TImode || mode == TFmode)
4747 base = (offset + 0x100) & ~0x1f8;
4748 else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0)
4749 base = (offset + 0x100) & ~0x1ff;
4750 else
4751 base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
4753 *off = GEN_INT (base);
4754 *disp = GEN_INT (offset - base);
4755 return true;
4758 /* Return the binary representation of floating point constant VALUE in INTVAL.
4759 If the value cannot be converted, return false without setting INTVAL.
4760 The conversion is done in the given MODE. */
4761 bool
4762 aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
4765 /* We make a general exception for 0. */
4766 if (aarch64_float_const_zero_rtx_p (value))
4768 *intval = 0;
4769 return true;
4772 machine_mode mode = GET_MODE (value);
4773 if (GET_CODE (value) != CONST_DOUBLE
4774 || !SCALAR_FLOAT_MODE_P (mode)
4775 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
4776 /* Only support up to DF mode. */
4777 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
4778 return false;
4780 unsigned HOST_WIDE_INT ival = 0;
4782 long res[2];
4783 real_to_target (res,
4784 CONST_DOUBLE_REAL_VALUE (value),
4785 REAL_MODE_FORMAT (mode));
4787 if (mode == DFmode)
4789 int order = BYTES_BIG_ENDIAN ? 1 : 0;
4790 ival = zext_hwi (res[order], 32);
4791 ival |= (zext_hwi (res[1 - order], 32) << 32);
4793 else
4794 ival = zext_hwi (res[0], 32);
4796 *intval = ival;
4797 return true;
4800 /* Return TRUE if rtx X is an immediate constant that can be moved using a
4801 single MOV(+MOVK) followed by an FMOV. */
4802 bool
4803 aarch64_float_const_rtx_p (rtx x)
4805 machine_mode mode = GET_MODE (x);
4806 if (mode == VOIDmode)
4807 return false;
4809 /* Determine whether it's cheaper to write float constants as
4810 mov/movk pairs over ldr/adrp pairs. */
4811 unsigned HOST_WIDE_INT ival;
4813 if (GET_CODE (x) == CONST_DOUBLE
4814 && SCALAR_FLOAT_MODE_P (mode)
4815 && aarch64_reinterpret_float_as_int (x, &ival))
4817 scalar_int_mode imode = (mode == HFmode
4818 ? SImode
4819 : int_mode_for_mode (mode).require ());
4820 int num_instr = aarch64_internal_mov_immediate
4821 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
4822 return num_instr < 3;
4825 return false;
4828 /* Return TRUE if rtx X is immediate constant 0.0 */
4829 bool
4830 aarch64_float_const_zero_rtx_p (rtx x)
4832 if (GET_MODE (x) == VOIDmode)
4833 return false;
4835 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
4836 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
4837 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
4840 /* Return TRUE if rtx X is immediate constant that fits in a single
4841 MOVI immediate operation. */
4842 bool
4843 aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
4845 if (!TARGET_SIMD)
4846 return false;
4848 machine_mode vmode;
4849 scalar_int_mode imode;
4850 unsigned HOST_WIDE_INT ival;
4852 if (GET_CODE (x) == CONST_DOUBLE
4853 && SCALAR_FLOAT_MODE_P (mode))
4855 if (!aarch64_reinterpret_float_as_int (x, &ival))
4856 return false;
4858 /* We make a general exception for 0. */
4859 if (aarch64_float_const_zero_rtx_p (x))
4860 return true;
4862 imode = int_mode_for_mode (mode).require ();
4864 else if (GET_CODE (x) == CONST_INT
4865 && is_a <scalar_int_mode> (mode, &imode))
4866 ival = INTVAL (x);
4867 else
4868 return false;
4870 /* use a 64 bit mode for everything except for DI/DF mode, where we use
4871 a 128 bit vector mode. */
4872 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
4874 vmode = aarch64_simd_container_mode (imode, width);
4875 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
4877 return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);
4881 /* Return the fixed registers used for condition codes. */
4883 static bool
4884 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
4886 *p1 = CC_REGNUM;
4887 *p2 = INVALID_REGNUM;
4888 return true;
4891 /* This function is used by the call expanders of the machine description.
4892 RESULT is the register in which the result is returned. It's NULL for
4893 "call" and "sibcall".
4894 MEM is the location of the function call.
4895 SIBCALL indicates whether this function call is normal call or sibling call.
4896 It will generate different pattern accordingly. */
4898 void
4899 aarch64_expand_call (rtx result, rtx mem, bool sibcall)
4901 rtx call, callee, tmp;
4902 rtvec vec;
4903 machine_mode mode;
4905 gcc_assert (MEM_P (mem));
4906 callee = XEXP (mem, 0);
4907 mode = GET_MODE (callee);
4908 gcc_assert (mode == Pmode);
4910 /* Decide if we should generate indirect calls by loading the
4911 address of the callee into a register before performing
4912 the branch-and-link. */
4913 if (SYMBOL_REF_P (callee)
4914 ? (aarch64_is_long_call_p (callee)
4915 || aarch64_is_noplt_call_p (callee))
4916 : !REG_P (callee))
4917 XEXP (mem, 0) = force_reg (mode, callee);
4919 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
4921 if (result != NULL_RTX)
4922 call = gen_rtx_SET (result, call);
4924 if (sibcall)
4925 tmp = ret_rtx;
4926 else
4927 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
4929 vec = gen_rtvec (2, call, tmp);
4930 call = gen_rtx_PARALLEL (VOIDmode, vec);
4932 aarch64_emit_call_insn (call);
4935 /* Emit call insn with PAT and do aarch64-specific handling. */
4937 void
4938 aarch64_emit_call_insn (rtx pat)
4940 rtx insn = emit_call_insn (pat);
4942 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
4943 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
4944 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
4947 machine_mode
4948 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
4950 /* All floating point compares return CCFP if it is an equality
4951 comparison, and CCFPE otherwise. */
4952 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
4954 switch (code)
4956 case EQ:
4957 case NE:
4958 case UNORDERED:
4959 case ORDERED:
4960 case UNLT:
4961 case UNLE:
4962 case UNGT:
4963 case UNGE:
4964 case UNEQ:
4965 case LTGT:
4966 return CCFPmode;
4968 case LT:
4969 case LE:
4970 case GT:
4971 case GE:
4972 return CCFPEmode;
4974 default:
4975 gcc_unreachable ();
4979 /* Equality comparisons of short modes against zero can be performed
4980 using the TST instruction with the appropriate bitmask. */
4981 if (y == const0_rtx && REG_P (x)
4982 && (code == EQ || code == NE)
4983 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
4984 return CC_NZmode;
4986 /* Similarly, comparisons of zero_extends from shorter modes can
4987 be performed using an ANDS with an immediate mask. */
4988 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
4989 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4990 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
4991 && (code == EQ || code == NE))
4992 return CC_NZmode;
4994 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4995 && y == const0_rtx
4996 && (code == EQ || code == NE || code == LT || code == GE)
4997 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
4998 || GET_CODE (x) == NEG
4999 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
5000 && CONST_INT_P (XEXP (x, 2)))))
5001 return CC_NZmode;
5003 /* A compare with a shifted operand. Because of canonicalization,
5004 the comparison will have to be swapped when we emit the assembly
5005 code. */
5006 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
5007 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
5008 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
5009 || GET_CODE (x) == LSHIFTRT
5010 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
5011 return CC_SWPmode;
5013 /* Similarly for a negated operand, but we can only do this for
5014 equalities. */
5015 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
5016 && (REG_P (y) || GET_CODE (y) == SUBREG)
5017 && (code == EQ || code == NE)
5018 && GET_CODE (x) == NEG)
5019 return CC_Zmode;
5021 /* A test for unsigned overflow. */
5022 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
5023 && code == NE
5024 && GET_CODE (x) == PLUS
5025 && GET_CODE (y) == ZERO_EXTEND)
5026 return CC_Cmode;
5028 /* For everything else, return CCmode. */
5029 return CCmode;
5032 static int
5033 aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
5036 aarch64_get_condition_code (rtx x)
5038 machine_mode mode = GET_MODE (XEXP (x, 0));
5039 enum rtx_code comp_code = GET_CODE (x);
5041 if (GET_MODE_CLASS (mode) != MODE_CC)
5042 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
5043 return aarch64_get_condition_code_1 (mode, comp_code);
5046 static int
5047 aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
5049 switch (mode)
5051 case E_CCFPmode:
5052 case E_CCFPEmode:
5053 switch (comp_code)
5055 case GE: return AARCH64_GE;
5056 case GT: return AARCH64_GT;
5057 case LE: return AARCH64_LS;
5058 case LT: return AARCH64_MI;
5059 case NE: return AARCH64_NE;
5060 case EQ: return AARCH64_EQ;
5061 case ORDERED: return AARCH64_VC;
5062 case UNORDERED: return AARCH64_VS;
5063 case UNLT: return AARCH64_LT;
5064 case UNLE: return AARCH64_LE;
5065 case UNGT: return AARCH64_HI;
5066 case UNGE: return AARCH64_PL;
5067 default: return -1;
5069 break;
5071 case E_CCmode:
5072 switch (comp_code)
5074 case NE: return AARCH64_NE;
5075 case EQ: return AARCH64_EQ;
5076 case GE: return AARCH64_GE;
5077 case GT: return AARCH64_GT;
5078 case LE: return AARCH64_LE;
5079 case LT: return AARCH64_LT;
5080 case GEU: return AARCH64_CS;
5081 case GTU: return AARCH64_HI;
5082 case LEU: return AARCH64_LS;
5083 case LTU: return AARCH64_CC;
5084 default: return -1;
5086 break;
5088 case E_CC_SWPmode:
5089 switch (comp_code)
5091 case NE: return AARCH64_NE;
5092 case EQ: return AARCH64_EQ;
5093 case GE: return AARCH64_LE;
5094 case GT: return AARCH64_LT;
5095 case LE: return AARCH64_GE;
5096 case LT: return AARCH64_GT;
5097 case GEU: return AARCH64_LS;
5098 case GTU: return AARCH64_CC;
5099 case LEU: return AARCH64_CS;
5100 case LTU: return AARCH64_HI;
5101 default: return -1;
5103 break;
5105 case E_CC_NZmode:
5106 switch (comp_code)
5108 case NE: return AARCH64_NE;
5109 case EQ: return AARCH64_EQ;
5110 case GE: return AARCH64_PL;
5111 case LT: return AARCH64_MI;
5112 default: return -1;
5114 break;
5116 case E_CC_Zmode:
5117 switch (comp_code)
5119 case NE: return AARCH64_NE;
5120 case EQ: return AARCH64_EQ;
5121 default: return -1;
5123 break;
5125 case E_CC_Cmode:
5126 switch (comp_code)
5128 case NE: return AARCH64_CS;
5129 case EQ: return AARCH64_CC;
5130 default: return -1;
5132 break;
5134 default:
5135 return -1;
5138 return -1;
5141 bool
5142 aarch64_const_vec_all_same_in_range_p (rtx x,
5143 HOST_WIDE_INT minval,
5144 HOST_WIDE_INT maxval)
5146 HOST_WIDE_INT firstval;
5147 int count, i;
5149 if (GET_CODE (x) != CONST_VECTOR
5150 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
5151 return false;
5153 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
5154 if (firstval < minval || firstval > maxval)
5155 return false;
5157 count = CONST_VECTOR_NUNITS (x);
5158 for (i = 1; i < count; i++)
5159 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
5160 return false;
5162 return true;
5165 bool
5166 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
5168 return aarch64_const_vec_all_same_in_range_p (x, val, val);
5172 /* N Z C V. */
5173 #define AARCH64_CC_V 1
5174 #define AARCH64_CC_C (1 << 1)
5175 #define AARCH64_CC_Z (1 << 2)
5176 #define AARCH64_CC_N (1 << 3)
5178 /* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
5179 static const int aarch64_nzcv_codes[] =
5181 0, /* EQ, Z == 1. */
5182 AARCH64_CC_Z, /* NE, Z == 0. */
5183 0, /* CS, C == 1. */
5184 AARCH64_CC_C, /* CC, C == 0. */
5185 0, /* MI, N == 1. */
5186 AARCH64_CC_N, /* PL, N == 0. */
5187 0, /* VS, V == 1. */
5188 AARCH64_CC_V, /* VC, V == 0. */
5189 0, /* HI, C ==1 && Z == 0. */
5190 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
5191 AARCH64_CC_V, /* GE, N == V. */
5192 0, /* LT, N != V. */
5193 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
5194 0, /* LE, !(Z == 0 && N == V). */
5195 0, /* AL, Any. */
5196 0 /* NV, Any. */
5199 /* Print operand X to file F in a target specific manner according to CODE.
5200 The acceptable formatting commands given by CODE are:
5201 'c': An integer or symbol address without a preceding #
5202 sign.
5203 'e': Print the sign/zero-extend size as a character 8->b,
5204 16->h, 32->w.
5205 'p': Prints N such that 2^N == X (X must be power of 2 and
5206 const int).
5207 'P': Print the number of non-zero bits in X (a const_int).
5208 'H': Print the higher numbered register of a pair (TImode)
5209 of regs.
5210 'm': Print a condition (eq, ne, etc).
5211 'M': Same as 'm', but invert condition.
5212 'b/h/s/d/q': Print a scalar FP/SIMD register name.
5213 'S/T/U/V': Print a FP/SIMD register name for a register list.
5214 The register printed is the FP/SIMD register name
5215 of X + 0/1/2/3 for S/T/U/V.
5216 'R': Print a scalar FP/SIMD register name + 1.
5217 'X': Print bottom 16 bits of integer constant in hex.
5218 'w/x': Print a general register name or the zero register
5219 (32-bit or 64-bit).
5220 '0': Print a normal operand, if it's a general register,
5221 then we assume DImode.
5222 'k': Print NZCV for conditional compare instructions.
5223 'A': Output address constant representing the first
5224 argument of X, specifying a relocation offset
5225 if appropriate.
5226 'L': Output constant address specified by X
5227 with a relocation offset if appropriate.
5228 'G': Prints address of X, specifying a PC relative
5229 relocation mode if appropriate.
5230 'y': Output address of LDP or STP - this is used for
5231 some LDP/STPs which don't use a PARALLEL in their
5232 pattern (so the mode needs to be adjusted).
5233 'z': Output address of a typical LDP or STP. */
5235 static void
5236 aarch64_print_operand (FILE *f, rtx x, int code)
5238 switch (code)
5240 case 'c':
5241 switch (GET_CODE (x))
5243 case CONST_INT:
5244 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5245 break;
5247 case SYMBOL_REF:
5248 output_addr_const (f, x);
5249 break;
5251 case CONST:
5252 if (GET_CODE (XEXP (x, 0)) == PLUS
5253 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5255 output_addr_const (f, x);
5256 break;
5258 /* Fall through. */
5260 default:
5261 output_operand_lossage ("Unsupported operand for code '%c'", code);
5263 break;
5265 case 'e':
5267 int n;
5269 if (!CONST_INT_P (x)
5270 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
5272 output_operand_lossage ("invalid operand for '%%%c'", code);
5273 return;
5276 switch (n)
5278 case 3:
5279 fputc ('b', f);
5280 break;
5281 case 4:
5282 fputc ('h', f);
5283 break;
5284 case 5:
5285 fputc ('w', f);
5286 break;
5287 default:
5288 output_operand_lossage ("invalid operand for '%%%c'", code);
5289 return;
5292 break;
5294 case 'p':
5296 int n;
5298 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
5300 output_operand_lossage ("invalid operand for '%%%c'", code);
5301 return;
5304 asm_fprintf (f, "%d", n);
5306 break;
5308 case 'P':
5309 if (!CONST_INT_P (x))
5311 output_operand_lossage ("invalid operand for '%%%c'", code);
5312 return;
5315 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
5316 break;
5318 case 'H':
5319 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
5321 output_operand_lossage ("invalid operand for '%%%c'", code);
5322 return;
5325 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
5326 break;
5328 case 'M':
5329 case 'm':
5331 int cond_code;
5332 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5333 if (x == const_true_rtx)
5335 if (code == 'M')
5336 fputs ("nv", f);
5337 return;
5340 if (!COMPARISON_P (x))
5342 output_operand_lossage ("invalid operand for '%%%c'", code);
5343 return;
5346 cond_code = aarch64_get_condition_code (x);
5347 gcc_assert (cond_code >= 0);
5348 if (code == 'M')
5349 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
5350 fputs (aarch64_condition_codes[cond_code], f);
5352 break;
5354 case 'b':
5355 case 'h':
5356 case 's':
5357 case 'd':
5358 case 'q':
5359 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5361 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5362 return;
5364 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
5365 break;
5367 case 'S':
5368 case 'T':
5369 case 'U':
5370 case 'V':
5371 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5373 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5374 return;
5376 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
5377 break;
5379 case 'R':
5380 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
5382 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
5383 return;
5385 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
5386 break;
5388 case 'X':
5389 if (!CONST_INT_P (x))
5391 output_operand_lossage ("invalid operand for '%%%c'", code);
5392 return;
5394 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
5395 break;
5397 case 'w':
5398 case 'x':
5399 if (x == const0_rtx
5400 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
5402 asm_fprintf (f, "%czr", code);
5403 break;
5406 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
5408 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
5409 break;
5412 if (REG_P (x) && REGNO (x) == SP_REGNUM)
5414 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
5415 break;
5418 /* Fall through */
5420 case 0:
5421 if (x == NULL)
5423 output_operand_lossage ("missing operand");
5424 return;
5427 switch (GET_CODE (x))
5429 case REG:
5430 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
5431 break;
5433 case MEM:
5434 output_address (GET_MODE (x), XEXP (x, 0));
5435 break;
5437 case CONST:
5438 case LABEL_REF:
5439 case SYMBOL_REF:
5440 output_addr_const (asm_out_file, x);
5441 break;
5443 case CONST_INT:
5444 asm_fprintf (f, "%wd", INTVAL (x));
5445 break;
5447 case CONST_VECTOR:
5448 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
5450 gcc_assert (
5451 aarch64_const_vec_all_same_in_range_p (x,
5452 HOST_WIDE_INT_MIN,
5453 HOST_WIDE_INT_MAX));
5454 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
5456 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
5458 fputc ('0', f);
5460 else
5461 gcc_unreachable ();
5462 break;
5464 case CONST_DOUBLE:
5465 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5466 be getting CONST_DOUBLEs holding integers. */
5467 gcc_assert (GET_MODE (x) != VOIDmode);
5468 if (aarch64_float_const_zero_rtx_p (x))
5470 fputc ('0', f);
5471 break;
5473 else if (aarch64_float_const_representable_p (x))
5475 #define buf_size 20
5476 char float_buf[buf_size] = {'\0'};
5477 real_to_decimal_for_mode (float_buf,
5478 CONST_DOUBLE_REAL_VALUE (x),
5479 buf_size, buf_size,
5480 1, GET_MODE (x));
5481 asm_fprintf (asm_out_file, "%s", float_buf);
5482 break;
5483 #undef buf_size
5485 output_operand_lossage ("invalid constant");
5486 return;
5487 default:
5488 output_operand_lossage ("invalid operand");
5489 return;
5491 break;
5493 case 'A':
5494 if (GET_CODE (x) == HIGH)
5495 x = XEXP (x, 0);
5497 switch (aarch64_classify_symbolic_expression (x))
5499 case SYMBOL_SMALL_GOT_4G:
5500 asm_fprintf (asm_out_file, ":got:");
5501 break;
5503 case SYMBOL_SMALL_TLSGD:
5504 asm_fprintf (asm_out_file, ":tlsgd:");
5505 break;
5507 case SYMBOL_SMALL_TLSDESC:
5508 asm_fprintf (asm_out_file, ":tlsdesc:");
5509 break;
5511 case SYMBOL_SMALL_TLSIE:
5512 asm_fprintf (asm_out_file, ":gottprel:");
5513 break;
5515 case SYMBOL_TLSLE24:
5516 asm_fprintf (asm_out_file, ":tprel:");
5517 break;
5519 case SYMBOL_TINY_GOT:
5520 gcc_unreachable ();
5521 break;
5523 default:
5524 break;
5526 output_addr_const (asm_out_file, x);
5527 break;
5529 case 'L':
5530 switch (aarch64_classify_symbolic_expression (x))
5532 case SYMBOL_SMALL_GOT_4G:
5533 asm_fprintf (asm_out_file, ":lo12:");
5534 break;
5536 case SYMBOL_SMALL_TLSGD:
5537 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
5538 break;
5540 case SYMBOL_SMALL_TLSDESC:
5541 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
5542 break;
5544 case SYMBOL_SMALL_TLSIE:
5545 asm_fprintf (asm_out_file, ":gottprel_lo12:");
5546 break;
5548 case SYMBOL_TLSLE12:
5549 asm_fprintf (asm_out_file, ":tprel_lo12:");
5550 break;
5552 case SYMBOL_TLSLE24:
5553 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
5554 break;
5556 case SYMBOL_TINY_GOT:
5557 asm_fprintf (asm_out_file, ":got:");
5558 break;
5560 case SYMBOL_TINY_TLSIE:
5561 asm_fprintf (asm_out_file, ":gottprel:");
5562 break;
5564 default:
5565 break;
5567 output_addr_const (asm_out_file, x);
5568 break;
5570 case 'G':
5571 switch (aarch64_classify_symbolic_expression (x))
5573 case SYMBOL_TLSLE24:
5574 asm_fprintf (asm_out_file, ":tprel_hi12:");
5575 break;
5576 default:
5577 break;
5579 output_addr_const (asm_out_file, x);
5580 break;
5582 case 'k':
5584 HOST_WIDE_INT cond_code;
5586 if (!CONST_INT_P (x))
5588 output_operand_lossage ("invalid operand for '%%%c'", code);
5589 return;
5592 cond_code = INTVAL (x);
5593 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
5594 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
5596 break;
5598 case 'y':
5599 case 'z':
5601 machine_mode mode = GET_MODE (x);
5603 if (GET_CODE (x) != MEM
5604 || (code == 'y' && GET_MODE_SIZE (mode) != 16))
5606 output_operand_lossage ("invalid operand for '%%%c'", code);
5607 return;
5610 if (code == 'y')
5611 /* LDP/STP which uses a single double-width memory operand.
5612 Adjust the mode to appear like a typical LDP/STP.
5613 Currently this is supported for 16-byte accesses only. */
5614 mode = DFmode;
5616 if (!aarch64_print_ldpstp_address (f, mode, XEXP (x, 0)))
5617 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5619 break;
5621 default:
5622 output_operand_lossage ("invalid operand prefix '%%%c'", code);
5623 return;
5627 /* Print address 'x' of a memory access with mode 'mode'.
5628 'op' is the context required by aarch64_classify_address. It can either be
5629 MEM for a normal memory access or PARALLEL for LDP/STP. */
5630 static bool
5631 aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, RTX_CODE op)
5633 struct aarch64_address_info addr;
5635 /* Check all addresses are Pmode - including ILP32. */
5636 gcc_assert (GET_MODE (x) == Pmode);
5638 if (aarch64_classify_address (&addr, x, mode, op, true))
5639 switch (addr.type)
5641 case ADDRESS_REG_IMM:
5642 if (addr.offset == const0_rtx)
5643 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
5644 else
5645 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
5646 INTVAL (addr.offset));
5647 return true;
5649 case ADDRESS_REG_REG:
5650 if (addr.shift == 0)
5651 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
5652 reg_names [REGNO (addr.offset)]);
5653 else
5654 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
5655 reg_names [REGNO (addr.offset)], addr.shift);
5656 return true;
5658 case ADDRESS_REG_UXTW:
5659 if (addr.shift == 0)
5660 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
5661 REGNO (addr.offset) - R0_REGNUM);
5662 else
5663 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
5664 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5665 return true;
5667 case ADDRESS_REG_SXTW:
5668 if (addr.shift == 0)
5669 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
5670 REGNO (addr.offset) - R0_REGNUM);
5671 else
5672 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
5673 REGNO (addr.offset) - R0_REGNUM, addr.shift);
5674 return true;
5676 case ADDRESS_REG_WB:
5677 switch (GET_CODE (x))
5679 case PRE_INC:
5680 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
5681 GET_MODE_SIZE (mode));
5682 return true;
5683 case POST_INC:
5684 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
5685 GET_MODE_SIZE (mode));
5686 return true;
5687 case PRE_DEC:
5688 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
5689 GET_MODE_SIZE (mode));
5690 return true;
5691 case POST_DEC:
5692 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
5693 GET_MODE_SIZE (mode));
5694 return true;
5695 case PRE_MODIFY:
5696 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
5697 INTVAL (addr.offset));
5698 return true;
5699 case POST_MODIFY:
5700 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
5701 INTVAL (addr.offset));
5702 return true;
5703 default:
5704 break;
5706 break;
5708 case ADDRESS_LO_SUM:
5709 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
5710 output_addr_const (f, addr.offset);
5711 asm_fprintf (f, "]");
5712 return true;
5714 case ADDRESS_SYMBOLIC:
5715 output_addr_const (f, x);
5716 return true;
5719 return false;
5722 /* Print address 'x' of a LDP/STP with mode 'mode'. */
5723 static bool
5724 aarch64_print_ldpstp_address (FILE *f, machine_mode mode, rtx x)
5726 return aarch64_print_address_internal (f, mode, x, PARALLEL);
5729 /* Print address 'x' of a memory access with mode 'mode'. */
5730 static void
5731 aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
5733 if (!aarch64_print_address_internal (f, mode, x, MEM))
5734 output_addr_const (f, x);
5737 bool
5738 aarch64_label_mentioned_p (rtx x)
5740 const char *fmt;
5741 int i;
5743 if (GET_CODE (x) == LABEL_REF)
5744 return true;
5746 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5747 referencing instruction, but they are constant offsets, not
5748 symbols. */
5749 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5750 return false;
5752 fmt = GET_RTX_FORMAT (GET_CODE (x));
5753 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5755 if (fmt[i] == 'E')
5757 int j;
5759 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5760 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
5761 return 1;
5763 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
5764 return 1;
5767 return 0;
5770 /* Implement REGNO_REG_CLASS. */
5772 enum reg_class
5773 aarch64_regno_regclass (unsigned regno)
5775 if (GP_REGNUM_P (regno))
5776 return GENERAL_REGS;
5778 if (regno == SP_REGNUM)
5779 return STACK_REG;
5781 if (regno == FRAME_POINTER_REGNUM
5782 || regno == ARG_POINTER_REGNUM)
5783 return POINTER_REGS;
5785 if (FP_REGNUM_P (regno))
5786 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
5788 return NO_REGS;
5791 static rtx
5792 aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
5794 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5795 where mask is selected by alignment and size of the offset.
5796 We try to pick as large a range for the offset as possible to
5797 maximize the chance of a CSE. However, for aligned addresses
5798 we limit the range to 4k so that structures with different sized
5799 elements are likely to use the same base. We need to be careful
5800 not to split a CONST for some forms of address expression, otherwise
5801 it will generate sub-optimal code. */
5803 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
5805 rtx base = XEXP (x, 0);
5806 rtx offset_rtx = XEXP (x, 1);
5807 HOST_WIDE_INT offset = INTVAL (offset_rtx);
5809 if (GET_CODE (base) == PLUS)
5811 rtx op0 = XEXP (base, 0);
5812 rtx op1 = XEXP (base, 1);
5814 /* Force any scaling into a temp for CSE. */
5815 op0 = force_reg (Pmode, op0);
5816 op1 = force_reg (Pmode, op1);
5818 /* Let the pointer register be in op0. */
5819 if (REG_POINTER (op1))
5820 std::swap (op0, op1);
5822 /* If the pointer is virtual or frame related, then we know that
5823 virtual register instantiation or register elimination is going
5824 to apply a second constant. We want the two constants folded
5825 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5826 if (virt_or_elim_regno_p (REGNO (op0)))
5828 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
5829 NULL_RTX, true, OPTAB_DIRECT);
5830 return gen_rtx_PLUS (Pmode, base, op1);
5833 /* Otherwise, in order to encourage CSE (and thence loop strength
5834 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5835 base = expand_binop (Pmode, add_optab, op0, op1,
5836 NULL_RTX, true, OPTAB_DIRECT);
5837 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
5840 /* Does it look like we'll need a 16-byte load/store-pair operation? */
5841 HOST_WIDE_INT base_offset;
5842 if (GET_MODE_SIZE (mode) > 16)
5843 base_offset = (offset + 0x400) & ~0x7f0;
5844 /* For offsets aren't a multiple of the access size, the limit is
5845 -256...255. */
5846 else if (offset & (GET_MODE_SIZE (mode) - 1))
5848 base_offset = (offset + 0x100) & ~0x1ff;
5850 /* BLKmode typically uses LDP of X-registers. */
5851 if (mode == BLKmode)
5852 base_offset = (offset + 512) & ~0x3ff;
5854 /* Small negative offsets are supported. */
5855 else if (IN_RANGE (offset, -256, 0))
5856 base_offset = 0;
5857 else if (mode == TImode || mode == TFmode)
5858 base_offset = (offset + 0x100) & ~0x1ff;
5859 /* Use 12-bit offset by access size. */
5860 else
5861 base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
5863 if (base_offset != 0)
5865 base = plus_constant (Pmode, base, base_offset);
5866 base = force_operand (base, NULL_RTX);
5867 return plus_constant (Pmode, base, offset - base_offset);
5871 return x;
5874 /* Return the reload icode required for a constant pool in mode. */
5875 static enum insn_code
5876 aarch64_constant_pool_reload_icode (machine_mode mode)
5878 switch (mode)
5880 case E_SFmode:
5881 return CODE_FOR_aarch64_reload_movcpsfdi;
5883 case E_DFmode:
5884 return CODE_FOR_aarch64_reload_movcpdfdi;
5886 case E_TFmode:
5887 return CODE_FOR_aarch64_reload_movcptfdi;
5889 case E_V8QImode:
5890 return CODE_FOR_aarch64_reload_movcpv8qidi;
5892 case E_V16QImode:
5893 return CODE_FOR_aarch64_reload_movcpv16qidi;
5895 case E_V4HImode:
5896 return CODE_FOR_aarch64_reload_movcpv4hidi;
5898 case E_V8HImode:
5899 return CODE_FOR_aarch64_reload_movcpv8hidi;
5901 case E_V2SImode:
5902 return CODE_FOR_aarch64_reload_movcpv2sidi;
5904 case E_V4SImode:
5905 return CODE_FOR_aarch64_reload_movcpv4sidi;
5907 case E_V2DImode:
5908 return CODE_FOR_aarch64_reload_movcpv2didi;
5910 case E_V2DFmode:
5911 return CODE_FOR_aarch64_reload_movcpv2dfdi;
5913 default:
5914 gcc_unreachable ();
5917 gcc_unreachable ();
5919 static reg_class_t
5920 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
5921 reg_class_t rclass,
5922 machine_mode mode,
5923 secondary_reload_info *sri)
5926 /* If we have to disable direct literal pool loads and stores because the
5927 function is too big, then we need a scratch register. */
5928 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
5929 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
5930 || targetm.vector_mode_supported_p (GET_MODE (x)))
5931 && !aarch64_pcrelative_literal_loads)
5933 sri->icode = aarch64_constant_pool_reload_icode (mode);
5934 return NO_REGS;
5937 /* Without the TARGET_SIMD instructions we cannot move a Q register
5938 to a Q register directly. We need a scratch. */
5939 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
5940 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
5941 && reg_class_subset_p (rclass, FP_REGS))
5943 if (mode == TFmode)
5944 sri->icode = CODE_FOR_aarch64_reload_movtf;
5945 else if (mode == TImode)
5946 sri->icode = CODE_FOR_aarch64_reload_movti;
5947 return NO_REGS;
5950 /* A TFmode or TImode memory access should be handled via an FP_REGS
5951 because AArch64 has richer addressing modes for LDR/STR instructions
5952 than LDP/STP instructions. */
5953 if (TARGET_FLOAT && rclass == GENERAL_REGS
5954 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
5955 return FP_REGS;
5957 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
5958 return GENERAL_REGS;
5960 return NO_REGS;
5963 static bool
5964 aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
5966 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
5968 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
5969 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
5970 if (frame_pointer_needed)
5971 return to == HARD_FRAME_POINTER_REGNUM;
5972 return true;
5975 HOST_WIDE_INT
5976 aarch64_initial_elimination_offset (unsigned from, unsigned to)
5978 aarch64_layout_frame ();
5980 if (to == HARD_FRAME_POINTER_REGNUM)
5982 if (from == ARG_POINTER_REGNUM)
5983 return cfun->machine->frame.hard_fp_offset;
5985 if (from == FRAME_POINTER_REGNUM)
5986 return cfun->machine->frame.hard_fp_offset
5987 - cfun->machine->frame.locals_offset;
5990 if (to == STACK_POINTER_REGNUM)
5992 if (from == FRAME_POINTER_REGNUM)
5993 return cfun->machine->frame.frame_size
5994 - cfun->machine->frame.locals_offset;
5997 return cfun->machine->frame.frame_size;
6000 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6001 previous frame. */
6004 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6006 if (count != 0)
6007 return const0_rtx;
6008 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
6012 static void
6013 aarch64_asm_trampoline_template (FILE *f)
6015 if (TARGET_ILP32)
6017 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
6018 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
6020 else
6022 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
6023 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
6025 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
6026 assemble_aligned_integer (4, const0_rtx);
6027 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
6028 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
6031 static void
6032 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
6034 rtx fnaddr, mem, a_tramp;
6035 const int tramp_code_sz = 16;
6037 /* Don't need to copy the trailing D-words, we fill those in below. */
6038 emit_block_move (m_tramp, assemble_trampoline_template (),
6039 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
6040 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
6041 fnaddr = XEXP (DECL_RTL (fndecl), 0);
6042 if (GET_MODE (fnaddr) != ptr_mode)
6043 fnaddr = convert_memory_address (ptr_mode, fnaddr);
6044 emit_move_insn (mem, fnaddr);
6046 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
6047 emit_move_insn (mem, chain_value);
6049 /* XXX We should really define a "clear_cache" pattern and use
6050 gen_clear_cache(). */
6051 a_tramp = XEXP (m_tramp, 0);
6052 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
6053 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
6054 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
6055 ptr_mode);
6058 static unsigned char
6059 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
6061 switch (regclass)
6063 case CALLER_SAVE_REGS:
6064 case POINTER_REGS:
6065 case GENERAL_REGS:
6066 case ALL_REGS:
6067 case POINTER_AND_FP_REGS:
6068 case FP_REGS:
6069 case FP_LO_REGS:
6070 return
6071 aarch64_vector_mode_p (mode)
6072 ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG
6073 : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6074 case STACK_REG:
6075 return 1;
6077 case NO_REGS:
6078 return 0;
6080 default:
6081 break;
6083 gcc_unreachable ();
6086 static reg_class_t
6087 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
6089 if (regclass == POINTER_REGS)
6090 return GENERAL_REGS;
6092 if (regclass == STACK_REG)
6094 if (REG_P(x)
6095 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
6096 return regclass;
6098 return NO_REGS;
6101 /* Register eliminiation can result in a request for
6102 SP+constant->FP_REGS. We cannot support such operations which
6103 use SP as source and an FP_REG as destination, so reject out
6104 right now. */
6105 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
6107 rtx lhs = XEXP (x, 0);
6109 /* Look through a possible SUBREG introduced by ILP32. */
6110 if (GET_CODE (lhs) == SUBREG)
6111 lhs = SUBREG_REG (lhs);
6113 gcc_assert (REG_P (lhs));
6114 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
6115 POINTER_REGS));
6116 return NO_REGS;
6119 return regclass;
6122 void
6123 aarch64_asm_output_labelref (FILE* f, const char *name)
6125 asm_fprintf (f, "%U%s", name);
6128 static void
6129 aarch64_elf_asm_constructor (rtx symbol, int priority)
6131 if (priority == DEFAULT_INIT_PRIORITY)
6132 default_ctor_section_asm_out_constructor (symbol, priority);
6133 else
6135 section *s;
6136 /* While priority is known to be in range [0, 65535], so 18 bytes
6137 would be enough, the compiler might not know that. To avoid
6138 -Wformat-truncation false positive, use a larger size. */
6139 char buf[23];
6140 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
6141 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
6142 switch_to_section (s);
6143 assemble_align (POINTER_SIZE);
6144 assemble_aligned_integer (POINTER_BYTES, symbol);
6148 static void
6149 aarch64_elf_asm_destructor (rtx symbol, int priority)
6151 if (priority == DEFAULT_INIT_PRIORITY)
6152 default_dtor_section_asm_out_destructor (symbol, priority);
6153 else
6155 section *s;
6156 /* While priority is known to be in range [0, 65535], so 18 bytes
6157 would be enough, the compiler might not know that. To avoid
6158 -Wformat-truncation false positive, use a larger size. */
6159 char buf[23];
6160 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
6161 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
6162 switch_to_section (s);
6163 assemble_align (POINTER_SIZE);
6164 assemble_aligned_integer (POINTER_BYTES, symbol);
6168 const char*
6169 aarch64_output_casesi (rtx *operands)
6171 char buf[100];
6172 char label[100];
6173 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
6174 int index;
6175 static const char *const patterns[4][2] =
6178 "ldrb\t%w3, [%0,%w1,uxtw]",
6179 "add\t%3, %4, %w3, sxtb #2"
6182 "ldrh\t%w3, [%0,%w1,uxtw #1]",
6183 "add\t%3, %4, %w3, sxth #2"
6186 "ldr\t%w3, [%0,%w1,uxtw #2]",
6187 "add\t%3, %4, %w3, sxtw #2"
6189 /* We assume that DImode is only generated when not optimizing and
6190 that we don't really need 64-bit address offsets. That would
6191 imply an object file with 8GB of code in a single function! */
6193 "ldr\t%w3, [%0,%w1,uxtw #2]",
6194 "add\t%3, %4, %w3, sxtw #2"
6198 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
6200 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
6201 index = exact_log2 (GET_MODE_SIZE (mode));
6203 gcc_assert (index >= 0 && index <= 3);
6205 /* Need to implement table size reduction, by chaning the code below. */
6206 output_asm_insn (patterns[index][0], operands);
6207 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
6208 snprintf (buf, sizeof (buf),
6209 "adr\t%%4, %s", targetm.strip_name_encoding (label));
6210 output_asm_insn (buf, operands);
6211 output_asm_insn (patterns[index][1], operands);
6212 output_asm_insn ("br\t%3", operands);
6213 assemble_label (asm_out_file, label);
6214 return "";
6218 /* Return size in bits of an arithmetic operand which is shifted/scaled and
6219 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
6220 operator. */
6223 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
6225 if (shift >= 0 && shift <= 3)
6227 int size;
6228 for (size = 8; size <= 32; size *= 2)
6230 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
6231 if (mask == bits << shift)
6232 return size;
6235 return 0;
6238 /* Constant pools are per function only when PC relative
6239 literal loads are true or we are in the large memory
6240 model. */
6242 static inline bool
6243 aarch64_can_use_per_function_literal_pools_p (void)
6245 return (aarch64_pcrelative_literal_loads
6246 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
6249 static bool
6250 aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
6252 /* Fixme:: In an ideal world this would work similar
6253 to the logic in aarch64_select_rtx_section but this
6254 breaks bootstrap in gcc go. For now we workaround
6255 this by returning false here. */
6256 return false;
6259 /* Select appropriate section for constants depending
6260 on where we place literal pools. */
6262 static section *
6263 aarch64_select_rtx_section (machine_mode mode,
6264 rtx x,
6265 unsigned HOST_WIDE_INT align)
6267 if (aarch64_can_use_per_function_literal_pools_p ())
6268 return function_section (current_function_decl);
6270 return default_elf_select_rtx_section (mode, x, align);
6273 /* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6274 void
6275 aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
6276 HOST_WIDE_INT offset)
6278 /* When using per-function literal pools, we must ensure that any code
6279 section is aligned to the minimal instruction length, lest we get
6280 errors from the assembler re "unaligned instructions". */
6281 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
6282 ASM_OUTPUT_ALIGN (f, 2);
6285 /* Costs. */
6287 /* Helper function for rtx cost calculation. Strip a shift expression
6288 from X. Returns the inner operand if successful, or the original
6289 expression on failure. */
6290 static rtx
6291 aarch64_strip_shift (rtx x)
6293 rtx op = x;
6295 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6296 we can convert both to ROR during final output. */
6297 if ((GET_CODE (op) == ASHIFT
6298 || GET_CODE (op) == ASHIFTRT
6299 || GET_CODE (op) == LSHIFTRT
6300 || GET_CODE (op) == ROTATERT
6301 || GET_CODE (op) == ROTATE)
6302 && CONST_INT_P (XEXP (op, 1)))
6303 return XEXP (op, 0);
6305 if (GET_CODE (op) == MULT
6306 && CONST_INT_P (XEXP (op, 1))
6307 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
6308 return XEXP (op, 0);
6310 return x;
6313 /* Helper function for rtx cost calculation. Strip an extend
6314 expression from X. Returns the inner operand if successful, or the
6315 original expression on failure. We deal with a number of possible
6316 canonicalization variations here. If STRIP_SHIFT is true, then
6317 we can strip off a shift also. */
6318 static rtx
6319 aarch64_strip_extend (rtx x, bool strip_shift)
6321 scalar_int_mode mode;
6322 rtx op = x;
6324 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
6325 return op;
6327 /* Zero and sign extraction of a widened value. */
6328 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
6329 && XEXP (op, 2) == const0_rtx
6330 && GET_CODE (XEXP (op, 0)) == MULT
6331 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
6332 XEXP (op, 1)))
6333 return XEXP (XEXP (op, 0), 0);
6335 /* It can also be represented (for zero-extend) as an AND with an
6336 immediate. */
6337 if (GET_CODE (op) == AND
6338 && GET_CODE (XEXP (op, 0)) == MULT
6339 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
6340 && CONST_INT_P (XEXP (op, 1))
6341 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
6342 INTVAL (XEXP (op, 1))) != 0)
6343 return XEXP (XEXP (op, 0), 0);
6345 /* Now handle extended register, as this may also have an optional
6346 left shift by 1..4. */
6347 if (strip_shift
6348 && GET_CODE (op) == ASHIFT
6349 && CONST_INT_P (XEXP (op, 1))
6350 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
6351 op = XEXP (op, 0);
6353 if (GET_CODE (op) == ZERO_EXTEND
6354 || GET_CODE (op) == SIGN_EXTEND)
6355 op = XEXP (op, 0);
6357 if (op != x)
6358 return op;
6360 return x;
6363 /* Return true iff CODE is a shift supported in combination
6364 with arithmetic instructions. */
6366 static bool
6367 aarch64_shift_p (enum rtx_code code)
6369 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
6373 /* Return true iff X is a cheap shift without a sign extend. */
6375 static bool
6376 aarch64_cheap_mult_shift_p (rtx x)
6378 rtx op0, op1;
6380 op0 = XEXP (x, 0);
6381 op1 = XEXP (x, 1);
6383 if (!(aarch64_tune_params.extra_tuning_flags
6384 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
6385 return false;
6387 if (GET_CODE (op0) == SIGN_EXTEND)
6388 return false;
6390 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
6391 && UINTVAL (op1) <= 4)
6392 return true;
6394 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
6395 return false;
6397 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
6399 if (l2 > 0 && l2 <= 4)
6400 return true;
6402 return false;
6405 /* Helper function for rtx cost calculation. Calculate the cost of
6406 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6407 Return the calculated cost of the expression, recursing manually in to
6408 operands where needed. */
6410 static int
6411 aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
6413 rtx op0, op1;
6414 const struct cpu_cost_table *extra_cost
6415 = aarch64_tune_params.insn_extra_cost;
6416 int cost = 0;
6417 bool compound_p = (outer == PLUS || outer == MINUS);
6418 machine_mode mode = GET_MODE (x);
6420 gcc_checking_assert (code == MULT);
6422 op0 = XEXP (x, 0);
6423 op1 = XEXP (x, 1);
6425 if (VECTOR_MODE_P (mode))
6426 mode = GET_MODE_INNER (mode);
6428 /* Integer multiply/fma. */
6429 if (GET_MODE_CLASS (mode) == MODE_INT)
6431 /* The multiply will be canonicalized as a shift, cost it as such. */
6432 if (aarch64_shift_p (GET_CODE (x))
6433 || (CONST_INT_P (op1)
6434 && exact_log2 (INTVAL (op1)) > 0))
6436 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
6437 || GET_CODE (op0) == SIGN_EXTEND;
6438 if (speed)
6440 if (compound_p)
6442 /* If the shift is considered cheap,
6443 then don't add any cost. */
6444 if (aarch64_cheap_mult_shift_p (x))
6446 else if (REG_P (op1))
6447 /* ARITH + shift-by-register. */
6448 cost += extra_cost->alu.arith_shift_reg;
6449 else if (is_extend)
6450 /* ARITH + extended register. We don't have a cost field
6451 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6452 cost += extra_cost->alu.extend_arith;
6453 else
6454 /* ARITH + shift-by-immediate. */
6455 cost += extra_cost->alu.arith_shift;
6457 else
6458 /* LSL (immediate). */
6459 cost += extra_cost->alu.shift;
6462 /* Strip extends as we will have costed them in the case above. */
6463 if (is_extend)
6464 op0 = aarch64_strip_extend (op0, true);
6466 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
6468 return cost;
6471 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6472 compound and let the below cases handle it. After all, MNEG is a
6473 special-case alias of MSUB. */
6474 if (GET_CODE (op0) == NEG)
6476 op0 = XEXP (op0, 0);
6477 compound_p = true;
6480 /* Integer multiplies or FMAs have zero/sign extending variants. */
6481 if ((GET_CODE (op0) == ZERO_EXTEND
6482 && GET_CODE (op1) == ZERO_EXTEND)
6483 || (GET_CODE (op0) == SIGN_EXTEND
6484 && GET_CODE (op1) == SIGN_EXTEND))
6486 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
6487 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
6489 if (speed)
6491 if (compound_p)
6492 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
6493 cost += extra_cost->mult[0].extend_add;
6494 else
6495 /* MUL/SMULL/UMULL. */
6496 cost += extra_cost->mult[0].extend;
6499 return cost;
6502 /* This is either an integer multiply or a MADD. In both cases
6503 we want to recurse and cost the operands. */
6504 cost += rtx_cost (op0, mode, MULT, 0, speed);
6505 cost += rtx_cost (op1, mode, MULT, 1, speed);
6507 if (speed)
6509 if (compound_p)
6510 /* MADD/MSUB. */
6511 cost += extra_cost->mult[mode == DImode].add;
6512 else
6513 /* MUL. */
6514 cost += extra_cost->mult[mode == DImode].simple;
6517 return cost;
6519 else
6521 if (speed)
6523 /* Floating-point FMA/FMUL can also support negations of the
6524 operands, unless the rounding mode is upward or downward in
6525 which case FNMUL is different than FMUL with operand negation. */
6526 bool neg0 = GET_CODE (op0) == NEG;
6527 bool neg1 = GET_CODE (op1) == NEG;
6528 if (compound_p || !flag_rounding_math || (neg0 && neg1))
6530 if (neg0)
6531 op0 = XEXP (op0, 0);
6532 if (neg1)
6533 op1 = XEXP (op1, 0);
6536 if (compound_p)
6537 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6538 cost += extra_cost->fp[mode == DFmode].fma;
6539 else
6540 /* FMUL/FNMUL. */
6541 cost += extra_cost->fp[mode == DFmode].mult;
6544 cost += rtx_cost (op0, mode, MULT, 0, speed);
6545 cost += rtx_cost (op1, mode, MULT, 1, speed);
6546 return cost;
6550 static int
6551 aarch64_address_cost (rtx x,
6552 machine_mode mode,
6553 addr_space_t as ATTRIBUTE_UNUSED,
6554 bool speed)
6556 enum rtx_code c = GET_CODE (x);
6557 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
6558 struct aarch64_address_info info;
6559 int cost = 0;
6560 info.shift = 0;
6562 if (!aarch64_classify_address (&info, x, mode, c, false))
6564 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
6566 /* This is a CONST or SYMBOL ref which will be split
6567 in a different way depending on the code model in use.
6568 Cost it through the generic infrastructure. */
6569 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
6570 /* Divide through by the cost of one instruction to
6571 bring it to the same units as the address costs. */
6572 cost_symbol_ref /= COSTS_N_INSNS (1);
6573 /* The cost is then the cost of preparing the address,
6574 followed by an immediate (possibly 0) offset. */
6575 return cost_symbol_ref + addr_cost->imm_offset;
6577 else
6579 /* This is most likely a jump table from a case
6580 statement. */
6581 return addr_cost->register_offset;
6585 switch (info.type)
6587 case ADDRESS_LO_SUM:
6588 case ADDRESS_SYMBOLIC:
6589 case ADDRESS_REG_IMM:
6590 cost += addr_cost->imm_offset;
6591 break;
6593 case ADDRESS_REG_WB:
6594 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
6595 cost += addr_cost->pre_modify;
6596 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
6597 cost += addr_cost->post_modify;
6598 else
6599 gcc_unreachable ();
6601 break;
6603 case ADDRESS_REG_REG:
6604 cost += addr_cost->register_offset;
6605 break;
6607 case ADDRESS_REG_SXTW:
6608 cost += addr_cost->register_sextend;
6609 break;
6611 case ADDRESS_REG_UXTW:
6612 cost += addr_cost->register_zextend;
6613 break;
6615 default:
6616 gcc_unreachable ();
6620 if (info.shift > 0)
6622 /* For the sake of calculating the cost of the shifted register
6623 component, we can treat same sized modes in the same way. */
6624 switch (GET_MODE_BITSIZE (mode))
6626 case 16:
6627 cost += addr_cost->addr_scale_costs.hi;
6628 break;
6630 case 32:
6631 cost += addr_cost->addr_scale_costs.si;
6632 break;
6634 case 64:
6635 cost += addr_cost->addr_scale_costs.di;
6636 break;
6638 /* We can't tell, or this is a 128-bit vector. */
6639 default:
6640 cost += addr_cost->addr_scale_costs.ti;
6641 break;
6645 return cost;
6648 /* Return the cost of a branch. If SPEED_P is true then the compiler is
6649 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6650 to be taken. */
6653 aarch64_branch_cost (bool speed_p, bool predictable_p)
6655 /* When optimizing for speed, use the cost of unpredictable branches. */
6656 const struct cpu_branch_cost *branch_costs =
6657 aarch64_tune_params.branch_costs;
6659 if (!speed_p || predictable_p)
6660 return branch_costs->predictable;
6661 else
6662 return branch_costs->unpredictable;
6665 /* Return true if the RTX X in mode MODE is a zero or sign extract
6666 usable in an ADD or SUB (extended register) instruction. */
6667 static bool
6668 aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
6670 /* Catch add with a sign extract.
6671 This is add_<optab><mode>_multp2. */
6672 if (GET_CODE (x) == SIGN_EXTRACT
6673 || GET_CODE (x) == ZERO_EXTRACT)
6675 rtx op0 = XEXP (x, 0);
6676 rtx op1 = XEXP (x, 1);
6677 rtx op2 = XEXP (x, 2);
6679 if (GET_CODE (op0) == MULT
6680 && CONST_INT_P (op1)
6681 && op2 == const0_rtx
6682 && CONST_INT_P (XEXP (op0, 1))
6683 && aarch64_is_extend_from_extract (mode,
6684 XEXP (op0, 1),
6685 op1))
6687 return true;
6690 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6691 No shift. */
6692 else if (GET_CODE (x) == SIGN_EXTEND
6693 || GET_CODE (x) == ZERO_EXTEND)
6694 return REG_P (XEXP (x, 0));
6696 return false;
6699 static bool
6700 aarch64_frint_unspec_p (unsigned int u)
6702 switch (u)
6704 case UNSPEC_FRINTZ:
6705 case UNSPEC_FRINTP:
6706 case UNSPEC_FRINTM:
6707 case UNSPEC_FRINTA:
6708 case UNSPEC_FRINTN:
6709 case UNSPEC_FRINTX:
6710 case UNSPEC_FRINTI:
6711 return true;
6713 default:
6714 return false;
6718 /* Return true iff X is an rtx that will match an extr instruction
6719 i.e. as described in the *extr<mode>5_insn family of patterns.
6720 OP0 and OP1 will be set to the operands of the shifts involved
6721 on success and will be NULL_RTX otherwise. */
6723 static bool
6724 aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
6726 rtx op0, op1;
6727 scalar_int_mode mode;
6728 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
6729 return false;
6731 *res_op0 = NULL_RTX;
6732 *res_op1 = NULL_RTX;
6734 if (GET_CODE (x) != IOR)
6735 return false;
6737 op0 = XEXP (x, 0);
6738 op1 = XEXP (x, 1);
6740 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
6741 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
6743 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6744 if (GET_CODE (op1) == ASHIFT)
6745 std::swap (op0, op1);
6747 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
6748 return false;
6750 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
6751 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
6753 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
6754 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
6756 *res_op0 = XEXP (op0, 0);
6757 *res_op1 = XEXP (op1, 0);
6758 return true;
6762 return false;
6765 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6766 storing it in *COST. Result is true if the total cost of the operation
6767 has now been calculated. */
6768 static bool
6769 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
6771 rtx inner;
6772 rtx comparator;
6773 enum rtx_code cmpcode;
6775 if (COMPARISON_P (op0))
6777 inner = XEXP (op0, 0);
6778 comparator = XEXP (op0, 1);
6779 cmpcode = GET_CODE (op0);
6781 else
6783 inner = op0;
6784 comparator = const0_rtx;
6785 cmpcode = NE;
6788 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
6790 /* Conditional branch. */
6791 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
6792 return true;
6793 else
6795 if (cmpcode == NE || cmpcode == EQ)
6797 if (comparator == const0_rtx)
6799 /* TBZ/TBNZ/CBZ/CBNZ. */
6800 if (GET_CODE (inner) == ZERO_EXTRACT)
6801 /* TBZ/TBNZ. */
6802 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
6803 ZERO_EXTRACT, 0, speed);
6804 else
6805 /* CBZ/CBNZ. */
6806 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
6808 return true;
6811 else if (cmpcode == LT || cmpcode == GE)
6813 /* TBZ/TBNZ. */
6814 if (comparator == const0_rtx)
6815 return true;
6819 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
6821 /* CCMP. */
6822 if (GET_CODE (op1) == COMPARE)
6824 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6825 if (XEXP (op1, 1) == const0_rtx)
6826 *cost += 1;
6827 if (speed)
6829 machine_mode mode = GET_MODE (XEXP (op1, 0));
6830 const struct cpu_cost_table *extra_cost
6831 = aarch64_tune_params.insn_extra_cost;
6833 if (GET_MODE_CLASS (mode) == MODE_INT)
6834 *cost += extra_cost->alu.arith;
6835 else
6836 *cost += extra_cost->fp[mode == DFmode].compare;
6838 return true;
6841 /* It's a conditional operation based on the status flags,
6842 so it must be some flavor of CSEL. */
6844 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6845 if (GET_CODE (op1) == NEG
6846 || GET_CODE (op1) == NOT
6847 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
6848 op1 = XEXP (op1, 0);
6849 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
6851 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6852 op1 = XEXP (op1, 0);
6853 op2 = XEXP (op2, 0);
6856 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
6857 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
6858 return true;
6861 /* We don't know what this is, cost all operands. */
6862 return false;
6865 /* Check whether X is a bitfield operation of the form shift + extend that
6866 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6867 operand to which the bitfield operation is applied. Otherwise return
6868 NULL_RTX. */
6870 static rtx
6871 aarch64_extend_bitfield_pattern_p (rtx x)
6873 rtx_code outer_code = GET_CODE (x);
6874 machine_mode outer_mode = GET_MODE (x);
6876 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
6877 && outer_mode != SImode && outer_mode != DImode)
6878 return NULL_RTX;
6880 rtx inner = XEXP (x, 0);
6881 rtx_code inner_code = GET_CODE (inner);
6882 machine_mode inner_mode = GET_MODE (inner);
6883 rtx op = NULL_RTX;
6885 switch (inner_code)
6887 case ASHIFT:
6888 if (CONST_INT_P (XEXP (inner, 1))
6889 && (inner_mode == QImode || inner_mode == HImode))
6890 op = XEXP (inner, 0);
6891 break;
6892 case LSHIFTRT:
6893 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
6894 && (inner_mode == QImode || inner_mode == HImode))
6895 op = XEXP (inner, 0);
6896 break;
6897 case ASHIFTRT:
6898 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
6899 && (inner_mode == QImode || inner_mode == HImode))
6900 op = XEXP (inner, 0);
6901 break;
6902 default:
6903 break;
6906 return op;
6909 /* Return true if the mask and a shift amount from an RTX of the form
6910 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6911 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6913 bool
6914 aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
6915 rtx shft_amnt)
6917 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
6918 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
6919 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
6920 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
6923 /* Calculate the cost of calculating X, storing it in *COST. Result
6924 is true if the total cost of the operation has now been calculated. */
6925 static bool
6926 aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
6927 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
6929 rtx op0, op1, op2;
6930 const struct cpu_cost_table *extra_cost
6931 = aarch64_tune_params.insn_extra_cost;
6932 int code = GET_CODE (x);
6933 scalar_int_mode int_mode;
6935 /* By default, assume that everything has equivalent cost to the
6936 cheapest instruction. Any additional costs are applied as a delta
6937 above this default. */
6938 *cost = COSTS_N_INSNS (1);
6940 switch (code)
6942 case SET:
6943 /* The cost depends entirely on the operands to SET. */
6944 *cost = 0;
6945 op0 = SET_DEST (x);
6946 op1 = SET_SRC (x);
6948 switch (GET_CODE (op0))
6950 case MEM:
6951 if (speed)
6953 rtx address = XEXP (op0, 0);
6954 if (VECTOR_MODE_P (mode))
6955 *cost += extra_cost->ldst.storev;
6956 else if (GET_MODE_CLASS (mode) == MODE_INT)
6957 *cost += extra_cost->ldst.store;
6958 else if (mode == SFmode)
6959 *cost += extra_cost->ldst.storef;
6960 else if (mode == DFmode)
6961 *cost += extra_cost->ldst.stored;
6963 *cost +=
6964 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6965 0, speed));
6968 *cost += rtx_cost (op1, mode, SET, 1, speed);
6969 return true;
6971 case SUBREG:
6972 if (! REG_P (SUBREG_REG (op0)))
6973 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
6975 /* Fall through. */
6976 case REG:
6977 /* The cost is one per vector-register copied. */
6978 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
6980 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
6981 *cost = COSTS_N_INSNS (nregs);
6983 /* const0_rtx is in general free, but we will use an
6984 instruction to set a register to 0. */
6985 else if (REG_P (op1) || op1 == const0_rtx)
6987 /* The cost is 1 per register copied. */
6988 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
6989 *cost = COSTS_N_INSNS (nregs);
6991 else
6992 /* Cost is just the cost of the RHS of the set. */
6993 *cost += rtx_cost (op1, mode, SET, 1, speed);
6994 return true;
6996 case ZERO_EXTRACT:
6997 case SIGN_EXTRACT:
6998 /* Bit-field insertion. Strip any redundant widening of
6999 the RHS to meet the width of the target. */
7000 if (GET_CODE (op1) == SUBREG)
7001 op1 = SUBREG_REG (op1);
7002 if ((GET_CODE (op1) == ZERO_EXTEND
7003 || GET_CODE (op1) == SIGN_EXTEND)
7004 && CONST_INT_P (XEXP (op0, 1))
7005 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
7006 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
7007 op1 = XEXP (op1, 0);
7009 if (CONST_INT_P (op1))
7011 /* MOV immediate is assumed to always be cheap. */
7012 *cost = COSTS_N_INSNS (1);
7014 else
7016 /* BFM. */
7017 if (speed)
7018 *cost += extra_cost->alu.bfi;
7019 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
7022 return true;
7024 default:
7025 /* We can't make sense of this, assume default cost. */
7026 *cost = COSTS_N_INSNS (1);
7027 return false;
7029 return false;
7031 case CONST_INT:
7032 /* If an instruction can incorporate a constant within the
7033 instruction, the instruction's expression avoids calling
7034 rtx_cost() on the constant. If rtx_cost() is called on a
7035 constant, then it is usually because the constant must be
7036 moved into a register by one or more instructions.
7038 The exception is constant 0, which can be expressed
7039 as XZR/WZR and is therefore free. The exception to this is
7040 if we have (set (reg) (const0_rtx)) in which case we must cost
7041 the move. However, we can catch that when we cost the SET, so
7042 we don't need to consider that here. */
7043 if (x == const0_rtx)
7044 *cost = 0;
7045 else
7047 /* To an approximation, building any other constant is
7048 proportionally expensive to the number of instructions
7049 required to build that constant. This is true whether we
7050 are compiling for SPEED or otherwise. */
7051 if (!is_a <scalar_int_mode> (mode, &int_mode))
7052 int_mode = word_mode;
7053 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
7054 (NULL_RTX, x, false, int_mode));
7056 return true;
7058 case CONST_DOUBLE:
7060 /* First determine number of instructions to do the move
7061 as an integer constant. */
7062 if (!aarch64_float_const_representable_p (x)
7063 && !aarch64_can_const_movi_rtx_p (x, mode)
7064 && aarch64_float_const_rtx_p (x))
7066 unsigned HOST_WIDE_INT ival;
7067 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
7068 gcc_assert (succeed);
7070 scalar_int_mode imode = (mode == HFmode
7071 ? SImode
7072 : int_mode_for_mode (mode).require ());
7073 int ncost = aarch64_internal_mov_immediate
7074 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
7075 *cost += COSTS_N_INSNS (ncost);
7076 return true;
7079 if (speed)
7081 /* mov[df,sf]_aarch64. */
7082 if (aarch64_float_const_representable_p (x))
7083 /* FMOV (scalar immediate). */
7084 *cost += extra_cost->fp[mode == DFmode].fpconst;
7085 else if (!aarch64_float_const_zero_rtx_p (x))
7087 /* This will be a load from memory. */
7088 if (mode == DFmode)
7089 *cost += extra_cost->ldst.loadd;
7090 else
7091 *cost += extra_cost->ldst.loadf;
7093 else
7094 /* Otherwise this is +0.0. We get this using MOVI d0, #0
7095 or MOV v0.s[0], wzr - neither of which are modeled by the
7096 cost tables. Just use the default cost. */
7101 return true;
7103 case MEM:
7104 if (speed)
7106 /* For loads we want the base cost of a load, plus an
7107 approximation for the additional cost of the addressing
7108 mode. */
7109 rtx address = XEXP (x, 0);
7110 if (VECTOR_MODE_P (mode))
7111 *cost += extra_cost->ldst.loadv;
7112 else if (GET_MODE_CLASS (mode) == MODE_INT)
7113 *cost += extra_cost->ldst.load;
7114 else if (mode == SFmode)
7115 *cost += extra_cost->ldst.loadf;
7116 else if (mode == DFmode)
7117 *cost += extra_cost->ldst.loadd;
7119 *cost +=
7120 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7121 0, speed));
7124 return true;
7126 case NEG:
7127 op0 = XEXP (x, 0);
7129 if (VECTOR_MODE_P (mode))
7131 if (speed)
7133 /* FNEG. */
7134 *cost += extra_cost->vect.alu;
7136 return false;
7139 if (GET_MODE_CLASS (mode) == MODE_INT)
7141 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7142 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7144 /* CSETM. */
7145 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
7146 return true;
7149 /* Cost this as SUB wzr, X. */
7150 op0 = CONST0_RTX (mode);
7151 op1 = XEXP (x, 0);
7152 goto cost_minus;
7155 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7157 /* Support (neg(fma...)) as a single instruction only if
7158 sign of zeros is unimportant. This matches the decision
7159 making in aarch64.md. */
7160 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
7162 /* FNMADD. */
7163 *cost = rtx_cost (op0, mode, NEG, 0, speed);
7164 return true;
7166 if (GET_CODE (op0) == MULT)
7168 /* FNMUL. */
7169 *cost = rtx_cost (op0, mode, NEG, 0, speed);
7170 return true;
7172 if (speed)
7173 /* FNEG. */
7174 *cost += extra_cost->fp[mode == DFmode].neg;
7175 return false;
7178 return false;
7180 case CLRSB:
7181 case CLZ:
7182 if (speed)
7184 if (VECTOR_MODE_P (mode))
7185 *cost += extra_cost->vect.alu;
7186 else
7187 *cost += extra_cost->alu.clz;
7190 return false;
7192 case COMPARE:
7193 op0 = XEXP (x, 0);
7194 op1 = XEXP (x, 1);
7196 if (op1 == const0_rtx
7197 && GET_CODE (op0) == AND)
7199 x = op0;
7200 mode = GET_MODE (op0);
7201 goto cost_logic;
7204 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
7206 /* TODO: A write to the CC flags possibly costs extra, this
7207 needs encoding in the cost tables. */
7209 mode = GET_MODE (op0);
7210 /* ANDS. */
7211 if (GET_CODE (op0) == AND)
7213 x = op0;
7214 goto cost_logic;
7217 if (GET_CODE (op0) == PLUS)
7219 /* ADDS (and CMN alias). */
7220 x = op0;
7221 goto cost_plus;
7224 if (GET_CODE (op0) == MINUS)
7226 /* SUBS. */
7227 x = op0;
7228 goto cost_minus;
7231 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
7232 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
7233 && CONST_INT_P (XEXP (op0, 2)))
7235 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
7236 Handle it here directly rather than going to cost_logic
7237 since we know the immediate generated for the TST is valid
7238 so we can avoid creating an intermediate rtx for it only
7239 for costing purposes. */
7240 if (speed)
7241 *cost += extra_cost->alu.logical;
7243 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
7244 ZERO_EXTRACT, 0, speed);
7245 return true;
7248 if (GET_CODE (op1) == NEG)
7250 /* CMN. */
7251 if (speed)
7252 *cost += extra_cost->alu.arith;
7254 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
7255 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
7256 return true;
7259 /* CMP.
7261 Compare can freely swap the order of operands, and
7262 canonicalization puts the more complex operation first.
7263 But the integer MINUS logic expects the shift/extend
7264 operation in op1. */
7265 if (! (REG_P (op0)
7266 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
7268 op0 = XEXP (x, 1);
7269 op1 = XEXP (x, 0);
7271 goto cost_minus;
7274 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7276 /* FCMP. */
7277 if (speed)
7278 *cost += extra_cost->fp[mode == DFmode].compare;
7280 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
7282 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
7283 /* FCMP supports constant 0.0 for no extra cost. */
7284 return true;
7286 return false;
7289 if (VECTOR_MODE_P (mode))
7291 /* Vector compare. */
7292 if (speed)
7293 *cost += extra_cost->vect.alu;
7295 if (aarch64_float_const_zero_rtx_p (op1))
7297 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7298 cost. */
7299 return true;
7301 return false;
7303 return false;
7305 case MINUS:
7307 op0 = XEXP (x, 0);
7308 op1 = XEXP (x, 1);
7310 cost_minus:
7311 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
7313 /* Detect valid immediates. */
7314 if ((GET_MODE_CLASS (mode) == MODE_INT
7315 || (GET_MODE_CLASS (mode) == MODE_CC
7316 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
7317 && CONST_INT_P (op1)
7318 && aarch64_uimm12_shift (INTVAL (op1)))
7320 if (speed)
7321 /* SUB(S) (immediate). */
7322 *cost += extra_cost->alu.arith;
7323 return true;
7326 /* Look for SUB (extended register). */
7327 if (is_a <scalar_int_mode> (mode, &int_mode)
7328 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7330 if (speed)
7331 *cost += extra_cost->alu.extend_arith;
7333 op1 = aarch64_strip_extend (op1, true);
7334 *cost += rtx_cost (op1, VOIDmode,
7335 (enum rtx_code) GET_CODE (op1), 0, speed);
7336 return true;
7339 rtx new_op1 = aarch64_strip_extend (op1, false);
7341 /* Cost this as an FMA-alike operation. */
7342 if ((GET_CODE (new_op1) == MULT
7343 || aarch64_shift_p (GET_CODE (new_op1)))
7344 && code != COMPARE)
7346 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
7347 (enum rtx_code) code,
7348 speed);
7349 return true;
7352 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
7354 if (speed)
7356 if (VECTOR_MODE_P (mode))
7358 /* Vector SUB. */
7359 *cost += extra_cost->vect.alu;
7361 else if (GET_MODE_CLASS (mode) == MODE_INT)
7363 /* SUB(S). */
7364 *cost += extra_cost->alu.arith;
7366 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7368 /* FSUB. */
7369 *cost += extra_cost->fp[mode == DFmode].addsub;
7372 return true;
7375 case PLUS:
7377 rtx new_op0;
7379 op0 = XEXP (x, 0);
7380 op1 = XEXP (x, 1);
7382 cost_plus:
7383 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
7384 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
7386 /* CSINC. */
7387 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
7388 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
7389 return true;
7392 if (GET_MODE_CLASS (mode) == MODE_INT
7393 && CONST_INT_P (op1)
7394 && aarch64_uimm12_shift (INTVAL (op1)))
7396 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
7398 if (speed)
7399 /* ADD (immediate). */
7400 *cost += extra_cost->alu.arith;
7401 return true;
7404 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
7406 /* Look for ADD (extended register). */
7407 if (is_a <scalar_int_mode> (mode, &int_mode)
7408 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7410 if (speed)
7411 *cost += extra_cost->alu.extend_arith;
7413 op0 = aarch64_strip_extend (op0, true);
7414 *cost += rtx_cost (op0, VOIDmode,
7415 (enum rtx_code) GET_CODE (op0), 0, speed);
7416 return true;
7419 /* Strip any extend, leave shifts behind as we will
7420 cost them through mult_cost. */
7421 new_op0 = aarch64_strip_extend (op0, false);
7423 if (GET_CODE (new_op0) == MULT
7424 || aarch64_shift_p (GET_CODE (new_op0)))
7426 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
7427 speed);
7428 return true;
7431 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
7433 if (speed)
7435 if (VECTOR_MODE_P (mode))
7437 /* Vector ADD. */
7438 *cost += extra_cost->vect.alu;
7440 else if (GET_MODE_CLASS (mode) == MODE_INT)
7442 /* ADD. */
7443 *cost += extra_cost->alu.arith;
7445 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7447 /* FADD. */
7448 *cost += extra_cost->fp[mode == DFmode].addsub;
7451 return true;
7454 case BSWAP:
7455 *cost = COSTS_N_INSNS (1);
7457 if (speed)
7459 if (VECTOR_MODE_P (mode))
7460 *cost += extra_cost->vect.alu;
7461 else
7462 *cost += extra_cost->alu.rev;
7464 return false;
7466 case IOR:
7467 if (aarch_rev16_p (x))
7469 *cost = COSTS_N_INSNS (1);
7471 if (speed)
7473 if (VECTOR_MODE_P (mode))
7474 *cost += extra_cost->vect.alu;
7475 else
7476 *cost += extra_cost->alu.rev;
7478 return true;
7481 if (aarch64_extr_rtx_p (x, &op0, &op1))
7483 *cost += rtx_cost (op0, mode, IOR, 0, speed);
7484 *cost += rtx_cost (op1, mode, IOR, 1, speed);
7485 if (speed)
7486 *cost += extra_cost->alu.shift;
7488 return true;
7490 /* Fall through. */
7491 case XOR:
7492 case AND:
7493 cost_logic:
7494 op0 = XEXP (x, 0);
7495 op1 = XEXP (x, 1);
7497 if (VECTOR_MODE_P (mode))
7499 if (speed)
7500 *cost += extra_cost->vect.alu;
7501 return true;
7504 if (code == AND
7505 && GET_CODE (op0) == MULT
7506 && CONST_INT_P (XEXP (op0, 1))
7507 && CONST_INT_P (op1)
7508 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
7509 INTVAL (op1)) != 0)
7511 /* This is a UBFM/SBFM. */
7512 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
7513 if (speed)
7514 *cost += extra_cost->alu.bfx;
7515 return true;
7518 if (is_int_mode (mode, &int_mode))
7520 if (CONST_INT_P (op1))
7522 /* We have a mask + shift version of a UBFIZ
7523 i.e. the *andim_ashift<mode>_bfiz pattern. */
7524 if (GET_CODE (op0) == ASHIFT
7525 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
7526 XEXP (op0, 1)))
7528 *cost += rtx_cost (XEXP (op0, 0), int_mode,
7529 (enum rtx_code) code, 0, speed);
7530 if (speed)
7531 *cost += extra_cost->alu.bfx;
7533 return true;
7535 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
7537 /* We possibly get the immediate for free, this is not
7538 modelled. */
7539 *cost += rtx_cost (op0, int_mode,
7540 (enum rtx_code) code, 0, speed);
7541 if (speed)
7542 *cost += extra_cost->alu.logical;
7544 return true;
7547 else
7549 rtx new_op0 = op0;
7551 /* Handle ORN, EON, or BIC. */
7552 if (GET_CODE (op0) == NOT)
7553 op0 = XEXP (op0, 0);
7555 new_op0 = aarch64_strip_shift (op0);
7557 /* If we had a shift on op0 then this is a logical-shift-
7558 by-register/immediate operation. Otherwise, this is just
7559 a logical operation. */
7560 if (speed)
7562 if (new_op0 != op0)
7564 /* Shift by immediate. */
7565 if (CONST_INT_P (XEXP (op0, 1)))
7566 *cost += extra_cost->alu.log_shift;
7567 else
7568 *cost += extra_cost->alu.log_shift_reg;
7570 else
7571 *cost += extra_cost->alu.logical;
7574 /* In both cases we want to cost both operands. */
7575 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
7576 0, speed);
7577 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
7578 1, speed);
7580 return true;
7583 return false;
7585 case NOT:
7586 x = XEXP (x, 0);
7587 op0 = aarch64_strip_shift (x);
7589 if (VECTOR_MODE_P (mode))
7591 /* Vector NOT. */
7592 *cost += extra_cost->vect.alu;
7593 return false;
7596 /* MVN-shifted-reg. */
7597 if (op0 != x)
7599 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
7601 if (speed)
7602 *cost += extra_cost->alu.log_shift;
7604 return true;
7606 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7607 Handle the second form here taking care that 'a' in the above can
7608 be a shift. */
7609 else if (GET_CODE (op0) == XOR)
7611 rtx newop0 = XEXP (op0, 0);
7612 rtx newop1 = XEXP (op0, 1);
7613 rtx op0_stripped = aarch64_strip_shift (newop0);
7615 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
7616 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
7618 if (speed)
7620 if (op0_stripped != newop0)
7621 *cost += extra_cost->alu.log_shift;
7622 else
7623 *cost += extra_cost->alu.logical;
7626 return true;
7628 /* MVN. */
7629 if (speed)
7630 *cost += extra_cost->alu.logical;
7632 return false;
7634 case ZERO_EXTEND:
7636 op0 = XEXP (x, 0);
7637 /* If a value is written in SI mode, then zero extended to DI
7638 mode, the operation will in general be free as a write to
7639 a 'w' register implicitly zeroes the upper bits of an 'x'
7640 register. However, if this is
7642 (set (reg) (zero_extend (reg)))
7644 we must cost the explicit register move. */
7645 if (mode == DImode
7646 && GET_MODE (op0) == SImode
7647 && outer == SET)
7649 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
7651 /* If OP_COST is non-zero, then the cost of the zero extend
7652 is effectively the cost of the inner operation. Otherwise
7653 we have a MOV instruction and we take the cost from the MOV
7654 itself. This is true independently of whether we are
7655 optimizing for space or time. */
7656 if (op_cost)
7657 *cost = op_cost;
7659 return true;
7661 else if (MEM_P (op0))
7663 /* All loads can zero extend to any size for free. */
7664 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
7665 return true;
7668 op0 = aarch64_extend_bitfield_pattern_p (x);
7669 if (op0)
7671 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
7672 if (speed)
7673 *cost += extra_cost->alu.bfx;
7674 return true;
7677 if (speed)
7679 if (VECTOR_MODE_P (mode))
7681 /* UMOV. */
7682 *cost += extra_cost->vect.alu;
7684 else
7686 /* We generate an AND instead of UXTB/UXTH. */
7687 *cost += extra_cost->alu.logical;
7690 return false;
7692 case SIGN_EXTEND:
7693 if (MEM_P (XEXP (x, 0)))
7695 /* LDRSH. */
7696 if (speed)
7698 rtx address = XEXP (XEXP (x, 0), 0);
7699 *cost += extra_cost->ldst.load_sign_extend;
7701 *cost +=
7702 COSTS_N_INSNS (aarch64_address_cost (address, mode,
7703 0, speed));
7705 return true;
7708 op0 = aarch64_extend_bitfield_pattern_p (x);
7709 if (op0)
7711 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
7712 if (speed)
7713 *cost += extra_cost->alu.bfx;
7714 return true;
7717 if (speed)
7719 if (VECTOR_MODE_P (mode))
7720 *cost += extra_cost->vect.alu;
7721 else
7722 *cost += extra_cost->alu.extend;
7724 return false;
7726 case ASHIFT:
7727 op0 = XEXP (x, 0);
7728 op1 = XEXP (x, 1);
7730 if (CONST_INT_P (op1))
7732 if (speed)
7734 if (VECTOR_MODE_P (mode))
7736 /* Vector shift (immediate). */
7737 *cost += extra_cost->vect.alu;
7739 else
7741 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7742 aliases. */
7743 *cost += extra_cost->alu.shift;
7747 /* We can incorporate zero/sign extend for free. */
7748 if (GET_CODE (op0) == ZERO_EXTEND
7749 || GET_CODE (op0) == SIGN_EXTEND)
7750 op0 = XEXP (op0, 0);
7752 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
7753 return true;
7755 else
7757 if (VECTOR_MODE_P (mode))
7759 if (speed)
7760 /* Vector shift (register). */
7761 *cost += extra_cost->vect.alu;
7763 else
7765 if (speed)
7766 /* LSLV. */
7767 *cost += extra_cost->alu.shift_reg;
7769 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7770 && CONST_INT_P (XEXP (op1, 1))
7771 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
7773 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7774 /* We already demanded XEXP (op1, 0) to be REG_P, so
7775 don't recurse into it. */
7776 return true;
7779 return false; /* All arguments need to be in registers. */
7782 case ROTATE:
7783 case ROTATERT:
7784 case LSHIFTRT:
7785 case ASHIFTRT:
7786 op0 = XEXP (x, 0);
7787 op1 = XEXP (x, 1);
7789 if (CONST_INT_P (op1))
7791 /* ASR (immediate) and friends. */
7792 if (speed)
7794 if (VECTOR_MODE_P (mode))
7795 *cost += extra_cost->vect.alu;
7796 else
7797 *cost += extra_cost->alu.shift;
7800 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
7801 return true;
7803 else
7805 if (VECTOR_MODE_P (mode))
7807 if (speed)
7808 /* Vector shift (register). */
7809 *cost += extra_cost->vect.alu;
7811 else
7813 if (speed)
7814 /* ASR (register) and friends. */
7815 *cost += extra_cost->alu.shift_reg;
7817 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
7818 && CONST_INT_P (XEXP (op1, 1))
7819 && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1)
7821 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
7822 /* We already demanded XEXP (op1, 0) to be REG_P, so
7823 don't recurse into it. */
7824 return true;
7827 return false; /* All arguments need to be in registers. */
7830 case SYMBOL_REF:
7832 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
7833 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
7835 /* LDR. */
7836 if (speed)
7837 *cost += extra_cost->ldst.load;
7839 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
7840 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
7842 /* ADRP, followed by ADD. */
7843 *cost += COSTS_N_INSNS (1);
7844 if (speed)
7845 *cost += 2 * extra_cost->alu.arith;
7847 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
7848 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
7850 /* ADR. */
7851 if (speed)
7852 *cost += extra_cost->alu.arith;
7855 if (flag_pic)
7857 /* One extra load instruction, after accessing the GOT. */
7858 *cost += COSTS_N_INSNS (1);
7859 if (speed)
7860 *cost += extra_cost->ldst.load;
7862 return true;
7864 case HIGH:
7865 case LO_SUM:
7866 /* ADRP/ADD (immediate). */
7867 if (speed)
7868 *cost += extra_cost->alu.arith;
7869 return true;
7871 case ZERO_EXTRACT:
7872 case SIGN_EXTRACT:
7873 /* UBFX/SBFX. */
7874 if (speed)
7876 if (VECTOR_MODE_P (mode))
7877 *cost += extra_cost->vect.alu;
7878 else
7879 *cost += extra_cost->alu.bfx;
7882 /* We can trust that the immediates used will be correct (there
7883 are no by-register forms), so we need only cost op0. */
7884 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
7885 return true;
7887 case MULT:
7888 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
7889 /* aarch64_rtx_mult_cost always handles recursion to its
7890 operands. */
7891 return true;
7893 case MOD:
7894 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7895 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7896 an unconditional negate. This case should only ever be reached through
7897 the set_smod_pow2_cheap check in expmed.c. */
7898 if (CONST_INT_P (XEXP (x, 1))
7899 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
7900 && (mode == SImode || mode == DImode))
7902 /* We expand to 4 instructions. Reset the baseline. */
7903 *cost = COSTS_N_INSNS (4);
7905 if (speed)
7906 *cost += 2 * extra_cost->alu.logical
7907 + 2 * extra_cost->alu.arith;
7909 return true;
7912 /* Fall-through. */
7913 case UMOD:
7914 if (speed)
7916 /* Slighly prefer UMOD over SMOD. */
7917 if (VECTOR_MODE_P (mode))
7918 *cost += extra_cost->vect.alu;
7919 else if (GET_MODE_CLASS (mode) == MODE_INT)
7920 *cost += (extra_cost->mult[mode == DImode].add
7921 + extra_cost->mult[mode == DImode].idiv
7922 + (code == MOD ? 1 : 0));
7924 return false; /* All arguments need to be in registers. */
7926 case DIV:
7927 case UDIV:
7928 case SQRT:
7929 if (speed)
7931 if (VECTOR_MODE_P (mode))
7932 *cost += extra_cost->vect.alu;
7933 else if (GET_MODE_CLASS (mode) == MODE_INT)
7934 /* There is no integer SQRT, so only DIV and UDIV can get
7935 here. */
7936 *cost += (extra_cost->mult[mode == DImode].idiv
7937 /* Slighly prefer UDIV over SDIV. */
7938 + (code == DIV ? 1 : 0));
7939 else
7940 *cost += extra_cost->fp[mode == DFmode].div;
7942 return false; /* All arguments need to be in registers. */
7944 case IF_THEN_ELSE:
7945 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
7946 XEXP (x, 2), cost, speed);
7948 case EQ:
7949 case NE:
7950 case GT:
7951 case GTU:
7952 case LT:
7953 case LTU:
7954 case GE:
7955 case GEU:
7956 case LE:
7957 case LEU:
7959 return false; /* All arguments must be in registers. */
7961 case FMA:
7962 op0 = XEXP (x, 0);
7963 op1 = XEXP (x, 1);
7964 op2 = XEXP (x, 2);
7966 if (speed)
7968 if (VECTOR_MODE_P (mode))
7969 *cost += extra_cost->vect.alu;
7970 else
7971 *cost += extra_cost->fp[mode == DFmode].fma;
7974 /* FMSUB, FNMADD, and FNMSUB are free. */
7975 if (GET_CODE (op0) == NEG)
7976 op0 = XEXP (op0, 0);
7978 if (GET_CODE (op2) == NEG)
7979 op2 = XEXP (op2, 0);
7981 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7982 and the by-element operand as operand 0. */
7983 if (GET_CODE (op1) == NEG)
7984 op1 = XEXP (op1, 0);
7986 /* Catch vector-by-element operations. The by-element operand can
7987 either be (vec_duplicate (vec_select (x))) or just
7988 (vec_select (x)), depending on whether we are multiplying by
7989 a vector or a scalar.
7991 Canonicalization is not very good in these cases, FMA4 will put the
7992 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7993 if (GET_CODE (op0) == VEC_DUPLICATE)
7994 op0 = XEXP (op0, 0);
7995 else if (GET_CODE (op1) == VEC_DUPLICATE)
7996 op1 = XEXP (op1, 0);
7998 if (GET_CODE (op0) == VEC_SELECT)
7999 op0 = XEXP (op0, 0);
8000 else if (GET_CODE (op1) == VEC_SELECT)
8001 op1 = XEXP (op1, 0);
8003 /* If the remaining parameters are not registers,
8004 get the cost to put them into registers. */
8005 *cost += rtx_cost (op0, mode, FMA, 0, speed);
8006 *cost += rtx_cost (op1, mode, FMA, 1, speed);
8007 *cost += rtx_cost (op2, mode, FMA, 2, speed);
8008 return true;
8010 case FLOAT:
8011 case UNSIGNED_FLOAT:
8012 if (speed)
8013 *cost += extra_cost->fp[mode == DFmode].fromint;
8014 return false;
8016 case FLOAT_EXTEND:
8017 if (speed)
8019 if (VECTOR_MODE_P (mode))
8021 /*Vector truncate. */
8022 *cost += extra_cost->vect.alu;
8024 else
8025 *cost += extra_cost->fp[mode == DFmode].widen;
8027 return false;
8029 case FLOAT_TRUNCATE:
8030 if (speed)
8032 if (VECTOR_MODE_P (mode))
8034 /*Vector conversion. */
8035 *cost += extra_cost->vect.alu;
8037 else
8038 *cost += extra_cost->fp[mode == DFmode].narrow;
8040 return false;
8042 case FIX:
8043 case UNSIGNED_FIX:
8044 x = XEXP (x, 0);
8045 /* Strip the rounding part. They will all be implemented
8046 by the fcvt* family of instructions anyway. */
8047 if (GET_CODE (x) == UNSPEC)
8049 unsigned int uns_code = XINT (x, 1);
8051 if (uns_code == UNSPEC_FRINTA
8052 || uns_code == UNSPEC_FRINTM
8053 || uns_code == UNSPEC_FRINTN
8054 || uns_code == UNSPEC_FRINTP
8055 || uns_code == UNSPEC_FRINTZ)
8056 x = XVECEXP (x, 0, 0);
8059 if (speed)
8061 if (VECTOR_MODE_P (mode))
8062 *cost += extra_cost->vect.alu;
8063 else
8064 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
8067 /* We can combine fmul by a power of 2 followed by a fcvt into a single
8068 fixed-point fcvt. */
8069 if (GET_CODE (x) == MULT
8070 && ((VECTOR_MODE_P (mode)
8071 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
8072 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
8074 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
8075 0, speed);
8076 return true;
8079 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
8080 return true;
8082 case ABS:
8083 if (VECTOR_MODE_P (mode))
8085 /* ABS (vector). */
8086 if (speed)
8087 *cost += extra_cost->vect.alu;
8089 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8091 op0 = XEXP (x, 0);
8093 /* FABD, which is analogous to FADD. */
8094 if (GET_CODE (op0) == MINUS)
8096 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
8097 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
8098 if (speed)
8099 *cost += extra_cost->fp[mode == DFmode].addsub;
8101 return true;
8103 /* Simple FABS is analogous to FNEG. */
8104 if (speed)
8105 *cost += extra_cost->fp[mode == DFmode].neg;
8107 else
8109 /* Integer ABS will either be split to
8110 two arithmetic instructions, or will be an ABS
8111 (scalar), which we don't model. */
8112 *cost = COSTS_N_INSNS (2);
8113 if (speed)
8114 *cost += 2 * extra_cost->alu.arith;
8116 return false;
8118 case SMAX:
8119 case SMIN:
8120 if (speed)
8122 if (VECTOR_MODE_P (mode))
8123 *cost += extra_cost->vect.alu;
8124 else
8126 /* FMAXNM/FMINNM/FMAX/FMIN.
8127 TODO: This may not be accurate for all implementations, but
8128 we do not model this in the cost tables. */
8129 *cost += extra_cost->fp[mode == DFmode].addsub;
8132 return false;
8134 case UNSPEC:
8135 /* The floating point round to integer frint* instructions. */
8136 if (aarch64_frint_unspec_p (XINT (x, 1)))
8138 if (speed)
8139 *cost += extra_cost->fp[mode == DFmode].roundint;
8141 return false;
8144 if (XINT (x, 1) == UNSPEC_RBIT)
8146 if (speed)
8147 *cost += extra_cost->alu.rev;
8149 return false;
8151 break;
8153 case TRUNCATE:
8155 /* Decompose <su>muldi3_highpart. */
8156 if (/* (truncate:DI */
8157 mode == DImode
8158 /* (lshiftrt:TI */
8159 && GET_MODE (XEXP (x, 0)) == TImode
8160 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8161 /* (mult:TI */
8162 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8163 /* (ANY_EXTEND:TI (reg:DI))
8164 (ANY_EXTEND:TI (reg:DI))) */
8165 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8166 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
8167 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
8168 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
8169 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
8170 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
8171 /* (const_int 64) */
8172 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8173 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
8175 /* UMULH/SMULH. */
8176 if (speed)
8177 *cost += extra_cost->mult[mode == DImode].extend;
8178 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
8179 mode, MULT, 0, speed);
8180 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
8181 mode, MULT, 1, speed);
8182 return true;
8185 /* Fall through. */
8186 default:
8187 break;
8190 if (dump_file
8191 && flag_aarch64_verbose_cost)
8192 fprintf (dump_file,
8193 "\nFailed to cost RTX. Assuming default cost.\n");
8195 return true;
8198 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
8199 calculated for X. This cost is stored in *COST. Returns true
8200 if the total cost of X was calculated. */
8201 static bool
8202 aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
8203 int param, int *cost, bool speed)
8205 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
8207 if (dump_file
8208 && flag_aarch64_verbose_cost)
8210 print_rtl_single (dump_file, x);
8211 fprintf (dump_file, "\n%s cost: %d (%s)\n",
8212 speed ? "Hot" : "Cold",
8213 *cost, result ? "final" : "partial");
8216 return result;
8219 static int
8220 aarch64_register_move_cost (machine_mode mode,
8221 reg_class_t from_i, reg_class_t to_i)
8223 enum reg_class from = (enum reg_class) from_i;
8224 enum reg_class to = (enum reg_class) to_i;
8225 const struct cpu_regmove_cost *regmove_cost
8226 = aarch64_tune_params.regmove_cost;
8228 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
8229 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
8230 to = GENERAL_REGS;
8232 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
8233 from = GENERAL_REGS;
8235 /* Moving between GPR and stack cost is the same as GP2GP. */
8236 if ((from == GENERAL_REGS && to == STACK_REG)
8237 || (to == GENERAL_REGS && from == STACK_REG))
8238 return regmove_cost->GP2GP;
8240 /* To/From the stack register, we move via the gprs. */
8241 if (to == STACK_REG || from == STACK_REG)
8242 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
8243 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
8245 if (GET_MODE_SIZE (mode) == 16)
8247 /* 128-bit operations on general registers require 2 instructions. */
8248 if (from == GENERAL_REGS && to == GENERAL_REGS)
8249 return regmove_cost->GP2GP * 2;
8250 else if (from == GENERAL_REGS)
8251 return regmove_cost->GP2FP * 2;
8252 else if (to == GENERAL_REGS)
8253 return regmove_cost->FP2GP * 2;
8255 /* When AdvSIMD instructions are disabled it is not possible to move
8256 a 128-bit value directly between Q registers. This is handled in
8257 secondary reload. A general register is used as a scratch to move
8258 the upper DI value and the lower DI value is moved directly,
8259 hence the cost is the sum of three moves. */
8260 if (! TARGET_SIMD)
8261 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
8263 return regmove_cost->FP2FP;
8266 if (from == GENERAL_REGS && to == GENERAL_REGS)
8267 return regmove_cost->GP2GP;
8268 else if (from == GENERAL_REGS)
8269 return regmove_cost->GP2FP;
8270 else if (to == GENERAL_REGS)
8271 return regmove_cost->FP2GP;
8273 return regmove_cost->FP2FP;
8276 static int
8277 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
8278 reg_class_t rclass ATTRIBUTE_UNUSED,
8279 bool in ATTRIBUTE_UNUSED)
8281 return aarch64_tune_params.memmov_cost;
8284 /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
8285 to optimize 1.0/sqrt. */
8287 static bool
8288 use_rsqrt_p (machine_mode mode)
8290 return (!flag_trapping_math
8291 && flag_unsafe_math_optimizations
8292 && ((aarch64_tune_params.approx_modes->recip_sqrt
8293 & AARCH64_APPROX_MODE (mode))
8294 || flag_mrecip_low_precision_sqrt));
8297 /* Function to decide when to use the approximate reciprocal square root
8298 builtin. */
8300 static tree
8301 aarch64_builtin_reciprocal (tree fndecl)
8303 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
8305 if (!use_rsqrt_p (mode))
8306 return NULL_TREE;
8307 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
8310 typedef rtx (*rsqrte_type) (rtx, rtx);
8312 /* Select reciprocal square root initial estimate insn depending on machine
8313 mode. */
8315 static rsqrte_type
8316 get_rsqrte_type (machine_mode mode)
8318 switch (mode)
8320 case E_DFmode: return gen_aarch64_rsqrtedf;
8321 case E_SFmode: return gen_aarch64_rsqrtesf;
8322 case E_V2DFmode: return gen_aarch64_rsqrtev2df;
8323 case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
8324 case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
8325 default: gcc_unreachable ();
8329 typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
8331 /* Select reciprocal square root series step insn depending on machine mode. */
8333 static rsqrts_type
8334 get_rsqrts_type (machine_mode mode)
8336 switch (mode)
8338 case E_DFmode: return gen_aarch64_rsqrtsdf;
8339 case E_SFmode: return gen_aarch64_rsqrtssf;
8340 case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
8341 case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
8342 case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
8343 default: gcc_unreachable ();
8347 /* Emit instruction sequence to compute either the approximate square root
8348 or its approximate reciprocal, depending on the flag RECP, and return
8349 whether the sequence was emitted or not. */
8351 bool
8352 aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
8354 machine_mode mode = GET_MODE (dst);
8356 if (GET_MODE_INNER (mode) == HFmode)
8358 gcc_assert (!recp);
8359 return false;
8362 if (!recp)
8364 if (!(flag_mlow_precision_sqrt
8365 || (aarch64_tune_params.approx_modes->sqrt
8366 & AARCH64_APPROX_MODE (mode))))
8367 return false;
8369 if (flag_finite_math_only
8370 || flag_trapping_math
8371 || !flag_unsafe_math_optimizations
8372 || optimize_function_for_size_p (cfun))
8373 return false;
8375 else
8376 /* Caller assumes we cannot fail. */
8377 gcc_assert (use_rsqrt_p (mode));
8379 machine_mode mmsk = mode_for_int_vector (mode).require ();
8380 rtx xmsk = gen_reg_rtx (mmsk);
8381 if (!recp)
8382 /* When calculating the approximate square root, compare the
8383 argument with 0.0 and create a mask. */
8384 emit_insn (gen_rtx_SET (xmsk,
8385 gen_rtx_NEG (mmsk,
8386 gen_rtx_EQ (mmsk, src,
8387 CONST0_RTX (mode)))));
8389 /* Estimate the approximate reciprocal square root. */
8390 rtx xdst = gen_reg_rtx (mode);
8391 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
8393 /* Iterate over the series twice for SF and thrice for DF. */
8394 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8396 /* Optionally iterate over the series once less for faster performance
8397 while sacrificing the accuracy. */
8398 if ((recp && flag_mrecip_low_precision_sqrt)
8399 || (!recp && flag_mlow_precision_sqrt))
8400 iterations--;
8402 /* Iterate over the series to calculate the approximate reciprocal square
8403 root. */
8404 rtx x1 = gen_reg_rtx (mode);
8405 while (iterations--)
8407 rtx x2 = gen_reg_rtx (mode);
8408 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
8410 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
8412 if (iterations > 0)
8413 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
8416 if (!recp)
8418 /* Qualify the approximate reciprocal square root when the argument is
8419 0.0 by squashing the intermediary result to 0.0. */
8420 rtx xtmp = gen_reg_rtx (mmsk);
8421 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
8422 gen_rtx_SUBREG (mmsk, xdst, 0)));
8423 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
8425 /* Calculate the approximate square root. */
8426 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
8429 /* Finalize the approximation. */
8430 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
8432 return true;
8435 typedef rtx (*recpe_type) (rtx, rtx);
8437 /* Select reciprocal initial estimate insn depending on machine mode. */
8439 static recpe_type
8440 get_recpe_type (machine_mode mode)
8442 switch (mode)
8444 case E_SFmode: return (gen_aarch64_frecpesf);
8445 case E_V2SFmode: return (gen_aarch64_frecpev2sf);
8446 case E_V4SFmode: return (gen_aarch64_frecpev4sf);
8447 case E_DFmode: return (gen_aarch64_frecpedf);
8448 case E_V2DFmode: return (gen_aarch64_frecpev2df);
8449 default: gcc_unreachable ();
8453 typedef rtx (*recps_type) (rtx, rtx, rtx);
8455 /* Select reciprocal series step insn depending on machine mode. */
8457 static recps_type
8458 get_recps_type (machine_mode mode)
8460 switch (mode)
8462 case E_SFmode: return (gen_aarch64_frecpssf);
8463 case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
8464 case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
8465 case E_DFmode: return (gen_aarch64_frecpsdf);
8466 case E_V2DFmode: return (gen_aarch64_frecpsv2df);
8467 default: gcc_unreachable ();
8471 /* Emit the instruction sequence to compute the approximation for the division
8472 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8474 bool
8475 aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
8477 machine_mode mode = GET_MODE (quo);
8479 if (GET_MODE_INNER (mode) == HFmode)
8480 return false;
8482 bool use_approx_division_p = (flag_mlow_precision_div
8483 || (aarch64_tune_params.approx_modes->division
8484 & AARCH64_APPROX_MODE (mode)));
8486 if (!flag_finite_math_only
8487 || flag_trapping_math
8488 || !flag_unsafe_math_optimizations
8489 || optimize_function_for_size_p (cfun)
8490 || !use_approx_division_p)
8491 return false;
8493 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
8494 return false;
8496 /* Estimate the approximate reciprocal. */
8497 rtx xrcp = gen_reg_rtx (mode);
8498 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
8500 /* Iterate over the series twice for SF and thrice for DF. */
8501 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
8503 /* Optionally iterate over the series once less for faster performance,
8504 while sacrificing the accuracy. */
8505 if (flag_mlow_precision_div)
8506 iterations--;
8508 /* Iterate over the series to calculate the approximate reciprocal. */
8509 rtx xtmp = gen_reg_rtx (mode);
8510 while (iterations--)
8512 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
8514 if (iterations > 0)
8515 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
8518 if (num != CONST1_RTX (mode))
8520 /* As the approximate reciprocal of DEN is already calculated, only
8521 calculate the approximate division when NUM is not 1.0. */
8522 rtx xnum = force_reg (mode, num);
8523 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
8526 /* Finalize the approximation. */
8527 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
8528 return true;
8531 /* Return the number of instructions that can be issued per cycle. */
8532 static int
8533 aarch64_sched_issue_rate (void)
8535 return aarch64_tune_params.issue_rate;
8538 static int
8539 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8541 int issue_rate = aarch64_sched_issue_rate ();
8543 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
8547 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8548 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8549 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8551 static int
8552 aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
8553 int ready_index)
8555 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
8559 /* Vectorizer cost model target hooks. */
8561 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8562 static int
8563 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8564 tree vectype,
8565 int misalign ATTRIBUTE_UNUSED)
8567 unsigned elements;
8568 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
8569 bool fp = false;
8571 if (vectype != NULL)
8572 fp = FLOAT_TYPE_P (vectype);
8574 switch (type_of_cost)
8576 case scalar_stmt:
8577 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8579 case scalar_load:
8580 return costs->scalar_load_cost;
8582 case scalar_store:
8583 return costs->scalar_store_cost;
8585 case vector_stmt:
8586 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8588 case vector_load:
8589 return costs->vec_align_load_cost;
8591 case vector_store:
8592 return costs->vec_store_cost;
8594 case vec_to_scalar:
8595 return costs->vec_to_scalar_cost;
8597 case scalar_to_vec:
8598 return costs->scalar_to_vec_cost;
8600 case unaligned_load:
8601 case vector_gather_load:
8602 return costs->vec_unalign_load_cost;
8604 case unaligned_store:
8605 case vector_scatter_store:
8606 return costs->vec_unalign_store_cost;
8608 case cond_branch_taken:
8609 return costs->cond_taken_branch_cost;
8611 case cond_branch_not_taken:
8612 return costs->cond_not_taken_branch_cost;
8614 case vec_perm:
8615 return costs->vec_permute_cost;
8617 case vec_promote_demote:
8618 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8620 case vec_construct:
8621 elements = TYPE_VECTOR_SUBPARTS (vectype);
8622 return elements / 2 + 1;
8624 default:
8625 gcc_unreachable ();
8629 /* Implement targetm.vectorize.add_stmt_cost. */
8630 static unsigned
8631 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8632 struct _stmt_vec_info *stmt_info, int misalign,
8633 enum vect_cost_model_location where)
8635 unsigned *cost = (unsigned *) data;
8636 unsigned retval = 0;
8638 if (flag_vect_cost_model)
8640 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8641 int stmt_cost =
8642 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
8644 /* Statements in an inner loop relative to the loop being
8645 vectorized are weighted more heavily. The value here is
8646 arbitrary and could potentially be improved with analysis. */
8647 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8648 count *= 50; /* FIXME */
8650 retval = (unsigned) (count * stmt_cost);
8651 cost[where] += retval;
8654 return retval;
8657 static void initialize_aarch64_code_model (struct gcc_options *);
8659 /* Parse the TO_PARSE string and put the architecture struct that it
8660 selects into RES and the architectural features into ISA_FLAGS.
8661 Return an aarch64_parse_opt_result describing the parse result.
8662 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
8664 static enum aarch64_parse_opt_result
8665 aarch64_parse_arch (const char *to_parse, const struct processor **res,
8666 unsigned long *isa_flags)
8668 char *ext;
8669 const struct processor *arch;
8670 char *str = (char *) alloca (strlen (to_parse) + 1);
8671 size_t len;
8673 strcpy (str, to_parse);
8675 ext = strchr (str, '+');
8677 if (ext != NULL)
8678 len = ext - str;
8679 else
8680 len = strlen (str);
8682 if (len == 0)
8683 return AARCH64_PARSE_MISSING_ARG;
8686 /* Loop through the list of supported ARCHes to find a match. */
8687 for (arch = all_architectures; arch->name != NULL; arch++)
8689 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
8691 unsigned long isa_temp = arch->flags;
8693 if (ext != NULL)
8695 /* TO_PARSE string contains at least one extension. */
8696 enum aarch64_parse_opt_result ext_res
8697 = aarch64_parse_extension (ext, &isa_temp);
8699 if (ext_res != AARCH64_PARSE_OK)
8700 return ext_res;
8702 /* Extension parsing was successful. Confirm the result
8703 arch and ISA flags. */
8704 *res = arch;
8705 *isa_flags = isa_temp;
8706 return AARCH64_PARSE_OK;
8710 /* ARCH name not found in list. */
8711 return AARCH64_PARSE_INVALID_ARG;
8714 /* Parse the TO_PARSE string and put the result tuning in RES and the
8715 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8716 describing the parse result. If there is an error parsing, RES and
8717 ISA_FLAGS are left unchanged. */
8719 static enum aarch64_parse_opt_result
8720 aarch64_parse_cpu (const char *to_parse, const struct processor **res,
8721 unsigned long *isa_flags)
8723 char *ext;
8724 const struct processor *cpu;
8725 char *str = (char *) alloca (strlen (to_parse) + 1);
8726 size_t len;
8728 strcpy (str, to_parse);
8730 ext = strchr (str, '+');
8732 if (ext != NULL)
8733 len = ext - str;
8734 else
8735 len = strlen (str);
8737 if (len == 0)
8738 return AARCH64_PARSE_MISSING_ARG;
8741 /* Loop through the list of supported CPUs to find a match. */
8742 for (cpu = all_cores; cpu->name != NULL; cpu++)
8744 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
8746 unsigned long isa_temp = cpu->flags;
8749 if (ext != NULL)
8751 /* TO_PARSE string contains at least one extension. */
8752 enum aarch64_parse_opt_result ext_res
8753 = aarch64_parse_extension (ext, &isa_temp);
8755 if (ext_res != AARCH64_PARSE_OK)
8756 return ext_res;
8758 /* Extension parsing was successfull. Confirm the result
8759 cpu and ISA flags. */
8760 *res = cpu;
8761 *isa_flags = isa_temp;
8762 return AARCH64_PARSE_OK;
8766 /* CPU name not found in list. */
8767 return AARCH64_PARSE_INVALID_ARG;
8770 /* Parse the TO_PARSE string and put the cpu it selects into RES.
8771 Return an aarch64_parse_opt_result describing the parse result.
8772 If the parsing fails the RES does not change. */
8774 static enum aarch64_parse_opt_result
8775 aarch64_parse_tune (const char *to_parse, const struct processor **res)
8777 const struct processor *cpu;
8778 char *str = (char *) alloca (strlen (to_parse) + 1);
8780 strcpy (str, to_parse);
8782 /* Loop through the list of supported CPUs to find a match. */
8783 for (cpu = all_cores; cpu->name != NULL; cpu++)
8785 if (strcmp (cpu->name, str) == 0)
8787 *res = cpu;
8788 return AARCH64_PARSE_OK;
8792 /* CPU name not found in list. */
8793 return AARCH64_PARSE_INVALID_ARG;
8796 /* Parse TOKEN, which has length LENGTH to see if it is an option
8797 described in FLAG. If it is, return the index bit for that fusion type.
8798 If not, error (printing OPTION_NAME) and return zero. */
8800 static unsigned int
8801 aarch64_parse_one_option_token (const char *token,
8802 size_t length,
8803 const struct aarch64_flag_desc *flag,
8804 const char *option_name)
8806 for (; flag->name != NULL; flag++)
8808 if (length == strlen (flag->name)
8809 && !strncmp (flag->name, token, length))
8810 return flag->flag;
8813 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
8814 return 0;
8817 /* Parse OPTION which is a comma-separated list of flags to enable.
8818 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8819 default state we inherit from the CPU tuning structures. OPTION_NAME
8820 gives the top-level option we are parsing in the -moverride string,
8821 for use in error messages. */
8823 static unsigned int
8824 aarch64_parse_boolean_options (const char *option,
8825 const struct aarch64_flag_desc *flags,
8826 unsigned int initial_state,
8827 const char *option_name)
8829 const char separator = '.';
8830 const char* specs = option;
8831 const char* ntoken = option;
8832 unsigned int found_flags = initial_state;
8834 while ((ntoken = strchr (specs, separator)))
8836 size_t token_length = ntoken - specs;
8837 unsigned token_ops = aarch64_parse_one_option_token (specs,
8838 token_length,
8839 flags,
8840 option_name);
8841 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8842 in the token stream, reset the supported operations. So:
8844 adrp+add.cmp+branch.none.adrp+add
8846 would have the result of turning on only adrp+add fusion. */
8847 if (!token_ops)
8848 found_flags = 0;
8850 found_flags |= token_ops;
8851 specs = ++ntoken;
8854 /* We ended with a comma, print something. */
8855 if (!(*specs))
8857 error ("%s string ill-formed\n", option_name);
8858 return 0;
8861 /* We still have one more token to parse. */
8862 size_t token_length = strlen (specs);
8863 unsigned token_ops = aarch64_parse_one_option_token (specs,
8864 token_length,
8865 flags,
8866 option_name);
8867 if (!token_ops)
8868 found_flags = 0;
8870 found_flags |= token_ops;
8871 return found_flags;
8874 /* Support for overriding instruction fusion. */
8876 static void
8877 aarch64_parse_fuse_string (const char *fuse_string,
8878 struct tune_params *tune)
8880 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
8881 aarch64_fusible_pairs,
8882 tune->fusible_ops,
8883 "fuse=");
8886 /* Support for overriding other tuning flags. */
8888 static void
8889 aarch64_parse_tune_string (const char *tune_string,
8890 struct tune_params *tune)
8892 tune->extra_tuning_flags
8893 = aarch64_parse_boolean_options (tune_string,
8894 aarch64_tuning_flags,
8895 tune->extra_tuning_flags,
8896 "tune=");
8899 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8900 we understand. If it is, extract the option string and handoff to
8901 the appropriate function. */
8903 void
8904 aarch64_parse_one_override_token (const char* token,
8905 size_t length,
8906 struct tune_params *tune)
8908 const struct aarch64_tuning_override_function *fn
8909 = aarch64_tuning_override_functions;
8911 const char *option_part = strchr (token, '=');
8912 if (!option_part)
8914 error ("tuning string missing in option (%s)", token);
8915 return;
8918 /* Get the length of the option name. */
8919 length = option_part - token;
8920 /* Skip the '=' to get to the option string. */
8921 option_part++;
8923 for (; fn->name != NULL; fn++)
8925 if (!strncmp (fn->name, token, length))
8927 fn->parse_override (option_part, tune);
8928 return;
8932 error ("unknown tuning option (%s)",token);
8933 return;
8936 /* A checking mechanism for the implementation of the tls size. */
8938 static void
8939 initialize_aarch64_tls_size (struct gcc_options *opts)
8941 if (aarch64_tls_size == 0)
8942 aarch64_tls_size = 24;
8944 switch (opts->x_aarch64_cmodel_var)
8946 case AARCH64_CMODEL_TINY:
8947 /* Both the default and maximum TLS size allowed under tiny is 1M which
8948 needs two instructions to address, so we clamp the size to 24. */
8949 if (aarch64_tls_size > 24)
8950 aarch64_tls_size = 24;
8951 break;
8952 case AARCH64_CMODEL_SMALL:
8953 /* The maximum TLS size allowed under small is 4G. */
8954 if (aarch64_tls_size > 32)
8955 aarch64_tls_size = 32;
8956 break;
8957 case AARCH64_CMODEL_LARGE:
8958 /* The maximum TLS size allowed under large is 16E.
8959 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8960 if (aarch64_tls_size > 48)
8961 aarch64_tls_size = 48;
8962 break;
8963 default:
8964 gcc_unreachable ();
8967 return;
8970 /* Parse STRING looking for options in the format:
8971 string :: option:string
8972 option :: name=substring
8973 name :: {a-z}
8974 substring :: defined by option. */
8976 static void
8977 aarch64_parse_override_string (const char* input_string,
8978 struct tune_params* tune)
8980 const char separator = ':';
8981 size_t string_length = strlen (input_string) + 1;
8982 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
8983 char *string = string_root;
8984 strncpy (string, input_string, string_length);
8985 string[string_length - 1] = '\0';
8987 char* ntoken = string;
8989 while ((ntoken = strchr (string, separator)))
8991 size_t token_length = ntoken - string;
8992 /* Make this substring look like a string. */
8993 *ntoken = '\0';
8994 aarch64_parse_one_override_token (string, token_length, tune);
8995 string = ++ntoken;
8998 /* One last option to parse. */
8999 aarch64_parse_one_override_token (string, strlen (string), tune);
9000 free (string_root);
9004 static void
9005 aarch64_override_options_after_change_1 (struct gcc_options *opts)
9007 /* PR 70044: We have to be careful about being called multiple times for the
9008 same function. This means all changes should be repeatable. */
9010 /* If the frame pointer is enabled, set it to a special value that behaves
9011 similar to frame pointer omission. If we don't do this all leaf functions
9012 will get a frame pointer even if flag_omit_leaf_frame_pointer is set.
9013 If flag_omit_frame_pointer has this special value, we must force the
9014 frame pointer if not in a leaf function. We also need to force it in a
9015 leaf function if flag_omit_frame_pointer is not set or if LR is used. */
9016 if (opts->x_flag_omit_frame_pointer == 0)
9017 opts->x_flag_omit_frame_pointer = 2;
9019 /* If not optimizing for size, set the default
9020 alignment to what the target wants. */
9021 if (!opts->x_optimize_size)
9023 if (opts->x_align_loops <= 0)
9024 opts->x_align_loops = aarch64_tune_params.loop_align;
9025 if (opts->x_align_jumps <= 0)
9026 opts->x_align_jumps = aarch64_tune_params.jump_align;
9027 if (opts->x_align_functions <= 0)
9028 opts->x_align_functions = aarch64_tune_params.function_align;
9031 /* We default to no pc-relative literal loads. */
9033 aarch64_pcrelative_literal_loads = false;
9035 /* If -mpc-relative-literal-loads is set on the command line, this
9036 implies that the user asked for PC relative literal loads. */
9037 if (opts->x_pcrelative_literal_loads == 1)
9038 aarch64_pcrelative_literal_loads = true;
9040 /* In the tiny memory model it makes no sense to disallow PC relative
9041 literal pool loads. */
9042 if (aarch64_cmodel == AARCH64_CMODEL_TINY
9043 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9044 aarch64_pcrelative_literal_loads = true;
9046 /* When enabling the lower precision Newton series for the square root, also
9047 enable it for the reciprocal square root, since the latter is an
9048 intermediary step for the former. */
9049 if (flag_mlow_precision_sqrt)
9050 flag_mrecip_low_precision_sqrt = true;
9053 /* 'Unpack' up the internal tuning structs and update the options
9054 in OPTS. The caller must have set up selected_tune and selected_arch
9055 as all the other target-specific codegen decisions are
9056 derived from them. */
9058 void
9059 aarch64_override_options_internal (struct gcc_options *opts)
9061 aarch64_tune_flags = selected_tune->flags;
9062 aarch64_tune = selected_tune->sched_core;
9063 /* Make a copy of the tuning parameters attached to the core, which
9064 we may later overwrite. */
9065 aarch64_tune_params = *(selected_tune->tune);
9066 aarch64_architecture_version = selected_arch->architecture_version;
9068 if (opts->x_aarch64_override_tune_string)
9069 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
9070 &aarch64_tune_params);
9072 /* This target defaults to strict volatile bitfields. */
9073 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
9074 opts->x_flag_strict_volatile_bitfields = 1;
9076 initialize_aarch64_code_model (opts);
9077 initialize_aarch64_tls_size (opts);
9079 int queue_depth = 0;
9080 switch (aarch64_tune_params.autoprefetcher_model)
9082 case tune_params::AUTOPREFETCHER_OFF:
9083 queue_depth = -1;
9084 break;
9085 case tune_params::AUTOPREFETCHER_WEAK:
9086 queue_depth = 0;
9087 break;
9088 case tune_params::AUTOPREFETCHER_STRONG:
9089 queue_depth = max_insn_queue_index + 1;
9090 break;
9091 default:
9092 gcc_unreachable ();
9095 /* We don't mind passing in global_options_set here as we don't use
9096 the *options_set structs anyway. */
9097 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
9098 queue_depth,
9099 opts->x_param_values,
9100 global_options_set.x_param_values);
9102 /* Set up parameters to be used in prefetching algorithm. Do not
9103 override the defaults unless we are tuning for a core we have
9104 researched values for. */
9105 if (aarch64_tune_params.prefetch->num_slots > 0)
9106 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
9107 aarch64_tune_params.prefetch->num_slots,
9108 opts->x_param_values,
9109 global_options_set.x_param_values);
9110 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
9111 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
9112 aarch64_tune_params.prefetch->l1_cache_size,
9113 opts->x_param_values,
9114 global_options_set.x_param_values);
9115 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
9116 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9117 aarch64_tune_params.prefetch->l1_cache_line_size,
9118 opts->x_param_values,
9119 global_options_set.x_param_values);
9120 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
9121 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
9122 aarch64_tune_params.prefetch->l2_cache_size,
9123 opts->x_param_values,
9124 global_options_set.x_param_values);
9126 /* Use the alternative scheduling-pressure algorithm by default. */
9127 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
9128 opts->x_param_values,
9129 global_options_set.x_param_values);
9131 /* Enable sw prefetching at specified optimization level for
9132 CPUS that have prefetch. Lower optimization level threshold by 1
9133 when profiling is enabled. */
9134 if (opts->x_flag_prefetch_loop_arrays < 0
9135 && !opts->x_optimize_size
9136 && aarch64_tune_params.prefetch->default_opt_level >= 0
9137 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
9138 opts->x_flag_prefetch_loop_arrays = 1;
9140 aarch64_override_options_after_change_1 (opts);
9143 /* Print a hint with a suggestion for a core or architecture name that
9144 most closely resembles what the user passed in STR. ARCH is true if
9145 the user is asking for an architecture name. ARCH is false if the user
9146 is asking for a core name. */
9148 static void
9149 aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
9151 auto_vec<const char *> candidates;
9152 const struct processor *entry = arch ? all_architectures : all_cores;
9153 for (; entry->name != NULL; entry++)
9154 candidates.safe_push (entry->name);
9155 char *s;
9156 const char *hint = candidates_list_and_hint (str, s, candidates);
9157 if (hint)
9158 inform (input_location, "valid arguments are: %s;"
9159 " did you mean %qs?", s, hint);
9160 XDELETEVEC (s);
9163 /* Print a hint with a suggestion for a core name that most closely resembles
9164 what the user passed in STR. */
9166 inline static void
9167 aarch64_print_hint_for_core (const char *str)
9169 aarch64_print_hint_for_core_or_arch (str, false);
9172 /* Print a hint with a suggestion for an architecture name that most closely
9173 resembles what the user passed in STR. */
9175 inline static void
9176 aarch64_print_hint_for_arch (const char *str)
9178 aarch64_print_hint_for_core_or_arch (str, true);
9181 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
9182 specified in STR and throw errors if appropriate. Put the results if
9183 they are valid in RES and ISA_FLAGS. Return whether the option is
9184 valid. */
9186 static bool
9187 aarch64_validate_mcpu (const char *str, const struct processor **res,
9188 unsigned long *isa_flags)
9190 enum aarch64_parse_opt_result parse_res
9191 = aarch64_parse_cpu (str, res, isa_flags);
9193 if (parse_res == AARCH64_PARSE_OK)
9194 return true;
9196 switch (parse_res)
9198 case AARCH64_PARSE_MISSING_ARG:
9199 error ("missing cpu name in %<-mcpu=%s%>", str);
9200 break;
9201 case AARCH64_PARSE_INVALID_ARG:
9202 error ("unknown value %qs for -mcpu", str);
9203 aarch64_print_hint_for_core (str);
9204 break;
9205 case AARCH64_PARSE_INVALID_FEATURE:
9206 error ("invalid feature modifier in %<-mcpu=%s%>", str);
9207 break;
9208 default:
9209 gcc_unreachable ();
9212 return false;
9215 /* Validate a command-line -march option. Parse the arch and extensions
9216 (if any) specified in STR and throw errors if appropriate. Put the
9217 results, if they are valid, in RES and ISA_FLAGS. Return whether the
9218 option is valid. */
9220 static bool
9221 aarch64_validate_march (const char *str, const struct processor **res,
9222 unsigned long *isa_flags)
9224 enum aarch64_parse_opt_result parse_res
9225 = aarch64_parse_arch (str, res, isa_flags);
9227 if (parse_res == AARCH64_PARSE_OK)
9228 return true;
9230 switch (parse_res)
9232 case AARCH64_PARSE_MISSING_ARG:
9233 error ("missing arch name in %<-march=%s%>", str);
9234 break;
9235 case AARCH64_PARSE_INVALID_ARG:
9236 error ("unknown value %qs for -march", str);
9237 aarch64_print_hint_for_arch (str);
9238 break;
9239 case AARCH64_PARSE_INVALID_FEATURE:
9240 error ("invalid feature modifier in %<-march=%s%>", str);
9241 break;
9242 default:
9243 gcc_unreachable ();
9246 return false;
9249 /* Validate a command-line -mtune option. Parse the cpu
9250 specified in STR and throw errors if appropriate. Put the
9251 result, if it is valid, in RES. Return whether the option is
9252 valid. */
9254 static bool
9255 aarch64_validate_mtune (const char *str, const struct processor **res)
9257 enum aarch64_parse_opt_result parse_res
9258 = aarch64_parse_tune (str, res);
9260 if (parse_res == AARCH64_PARSE_OK)
9261 return true;
9263 switch (parse_res)
9265 case AARCH64_PARSE_MISSING_ARG:
9266 error ("missing cpu name in %<-mtune=%s%>", str);
9267 break;
9268 case AARCH64_PARSE_INVALID_ARG:
9269 error ("unknown value %qs for -mtune", str);
9270 aarch64_print_hint_for_core (str);
9271 break;
9272 default:
9273 gcc_unreachable ();
9275 return false;
9278 /* Return the CPU corresponding to the enum CPU.
9279 If it doesn't specify a cpu, return the default. */
9281 static const struct processor *
9282 aarch64_get_tune_cpu (enum aarch64_processor cpu)
9284 if (cpu != aarch64_none)
9285 return &all_cores[cpu];
9287 /* The & 0x3f is to extract the bottom 6 bits that encode the
9288 default cpu as selected by the --with-cpu GCC configure option
9289 in config.gcc.
9290 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
9291 flags mechanism should be reworked to make it more sane. */
9292 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9295 /* Return the architecture corresponding to the enum ARCH.
9296 If it doesn't specify a valid architecture, return the default. */
9298 static const struct processor *
9299 aarch64_get_arch (enum aarch64_arch arch)
9301 if (arch != aarch64_no_arch)
9302 return &all_architectures[arch];
9304 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
9306 return &all_architectures[cpu->arch];
9309 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
9310 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
9311 tuning structs. In particular it must set selected_tune and
9312 aarch64_isa_flags that define the available ISA features and tuning
9313 decisions. It must also set selected_arch as this will be used to
9314 output the .arch asm tags for each function. */
9316 static void
9317 aarch64_override_options (void)
9319 unsigned long cpu_isa = 0;
9320 unsigned long arch_isa = 0;
9321 aarch64_isa_flags = 0;
9323 bool valid_cpu = true;
9324 bool valid_tune = true;
9325 bool valid_arch = true;
9327 selected_cpu = NULL;
9328 selected_arch = NULL;
9329 selected_tune = NULL;
9331 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9332 If either of -march or -mtune is given, they override their
9333 respective component of -mcpu. */
9334 if (aarch64_cpu_string)
9335 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
9336 &cpu_isa);
9338 if (aarch64_arch_string)
9339 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
9340 &arch_isa);
9342 if (aarch64_tune_string)
9343 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
9345 /* If the user did not specify a processor, choose the default
9346 one for them. This will be the CPU set during configuration using
9347 --with-cpu, otherwise it is "generic". */
9348 if (!selected_cpu)
9350 if (selected_arch)
9352 selected_cpu = &all_cores[selected_arch->ident];
9353 aarch64_isa_flags = arch_isa;
9354 explicit_arch = selected_arch->arch;
9356 else
9358 /* Get default configure-time CPU. */
9359 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
9360 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
9363 if (selected_tune)
9364 explicit_tune_core = selected_tune->ident;
9366 /* If both -mcpu and -march are specified check that they are architecturally
9367 compatible, warn if they're not and prefer the -march ISA flags. */
9368 else if (selected_arch)
9370 if (selected_arch->arch != selected_cpu->arch)
9372 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9373 all_architectures[selected_cpu->arch].name,
9374 selected_arch->name);
9376 aarch64_isa_flags = arch_isa;
9377 explicit_arch = selected_arch->arch;
9378 explicit_tune_core = selected_tune ? selected_tune->ident
9379 : selected_cpu->ident;
9381 else
9383 /* -mcpu but no -march. */
9384 aarch64_isa_flags = cpu_isa;
9385 explicit_tune_core = selected_tune ? selected_tune->ident
9386 : selected_cpu->ident;
9387 gcc_assert (selected_cpu);
9388 selected_arch = &all_architectures[selected_cpu->arch];
9389 explicit_arch = selected_arch->arch;
9392 /* Set the arch as well as we will need it when outputing
9393 the .arch directive in assembly. */
9394 if (!selected_arch)
9396 gcc_assert (selected_cpu);
9397 selected_arch = &all_architectures[selected_cpu->arch];
9400 if (!selected_tune)
9401 selected_tune = selected_cpu;
9403 #ifndef HAVE_AS_MABI_OPTION
9404 /* The compiler may have been configured with 2.23.* binutils, which does
9405 not have support for ILP32. */
9406 if (TARGET_ILP32)
9407 error ("Assembler does not support -mabi=ilp32");
9408 #endif
9410 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
9411 sorry ("Return address signing is only supported for -mabi=lp64");
9413 /* Make sure we properly set up the explicit options. */
9414 if ((aarch64_cpu_string && valid_cpu)
9415 || (aarch64_tune_string && valid_tune))
9416 gcc_assert (explicit_tune_core != aarch64_none);
9418 if ((aarch64_cpu_string && valid_cpu)
9419 || (aarch64_arch_string && valid_arch))
9420 gcc_assert (explicit_arch != aarch64_no_arch);
9422 aarch64_override_options_internal (&global_options);
9424 /* Save these options as the default ones in case we push and pop them later
9425 while processing functions with potential target attributes. */
9426 target_option_default_node = target_option_current_node
9427 = build_target_option_node (&global_options);
9430 /* Implement targetm.override_options_after_change. */
9432 static void
9433 aarch64_override_options_after_change (void)
9435 aarch64_override_options_after_change_1 (&global_options);
9438 static struct machine_function *
9439 aarch64_init_machine_status (void)
9441 struct machine_function *machine;
9442 machine = ggc_cleared_alloc<machine_function> ();
9443 return machine;
9446 void
9447 aarch64_init_expanders (void)
9449 init_machine_status = aarch64_init_machine_status;
9452 /* A checking mechanism for the implementation of the various code models. */
9453 static void
9454 initialize_aarch64_code_model (struct gcc_options *opts)
9456 if (opts->x_flag_pic)
9458 switch (opts->x_aarch64_cmodel_var)
9460 case AARCH64_CMODEL_TINY:
9461 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
9462 break;
9463 case AARCH64_CMODEL_SMALL:
9464 #ifdef HAVE_AS_SMALL_PIC_RELOCS
9465 aarch64_cmodel = (flag_pic == 2
9466 ? AARCH64_CMODEL_SMALL_PIC
9467 : AARCH64_CMODEL_SMALL_SPIC);
9468 #else
9469 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
9470 #endif
9471 break;
9472 case AARCH64_CMODEL_LARGE:
9473 sorry ("code model %qs with -f%s", "large",
9474 opts->x_flag_pic > 1 ? "PIC" : "pic");
9475 break;
9476 default:
9477 gcc_unreachable ();
9480 else
9481 aarch64_cmodel = opts->x_aarch64_cmodel_var;
9484 /* Implement TARGET_OPTION_SAVE. */
9486 static void
9487 aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
9489 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
9492 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9493 using the information saved in PTR. */
9495 static void
9496 aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
9498 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
9499 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9500 opts->x_explicit_arch = ptr->x_explicit_arch;
9501 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
9502 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
9504 aarch64_override_options_internal (opts);
9507 /* Implement TARGET_OPTION_PRINT. */
9509 static void
9510 aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
9512 const struct processor *cpu
9513 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
9514 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
9515 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
9516 std::string extension
9517 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
9519 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
9520 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
9521 arch->name, extension.c_str ());
9524 static GTY(()) tree aarch64_previous_fndecl;
9526 void
9527 aarch64_reset_previous_fndecl (void)
9529 aarch64_previous_fndecl = NULL;
9532 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9533 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9534 make sure optab availability predicates are recomputed when necessary. */
9536 void
9537 aarch64_save_restore_target_globals (tree new_tree)
9539 if (TREE_TARGET_GLOBALS (new_tree))
9540 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
9541 else if (new_tree == target_option_default_node)
9542 restore_target_globals (&default_target_globals);
9543 else
9544 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
9547 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9548 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9549 of the function, if such exists. This function may be called multiple
9550 times on a single function so use aarch64_previous_fndecl to avoid
9551 setting up identical state. */
9553 static void
9554 aarch64_set_current_function (tree fndecl)
9556 if (!fndecl || fndecl == aarch64_previous_fndecl)
9557 return;
9559 tree old_tree = (aarch64_previous_fndecl
9560 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
9561 : NULL_TREE);
9563 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
9565 /* If current function has no attributes but the previous one did,
9566 use the default node. */
9567 if (!new_tree && old_tree)
9568 new_tree = target_option_default_node;
9570 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9571 the default have been handled by aarch64_save_restore_target_globals from
9572 aarch64_pragma_target_parse. */
9573 if (old_tree == new_tree)
9574 return;
9576 aarch64_previous_fndecl = fndecl;
9578 /* First set the target options. */
9579 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
9581 aarch64_save_restore_target_globals (new_tree);
9584 /* Enum describing the various ways we can handle attributes.
9585 In many cases we can reuse the generic option handling machinery. */
9587 enum aarch64_attr_opt_type
9589 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
9590 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
9591 aarch64_attr_enum, /* Attribute sets an enum variable. */
9592 aarch64_attr_custom /* Attribute requires a custom handling function. */
9595 /* All the information needed to handle a target attribute.
9596 NAME is the name of the attribute.
9597 ATTR_TYPE specifies the type of behavior of the attribute as described
9598 in the definition of enum aarch64_attr_opt_type.
9599 ALLOW_NEG is true if the attribute supports a "no-" form.
9600 HANDLER is the function that takes the attribute string as an argument
9601 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
9602 OPT_NUM is the enum specifying the option that the attribute modifies.
9603 This is needed for attributes that mirror the behavior of a command-line
9604 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9605 aarch64_attr_enum. */
9607 struct aarch64_attribute_info
9609 const char *name;
9610 enum aarch64_attr_opt_type attr_type;
9611 bool allow_neg;
9612 bool (*handler) (const char *);
9613 enum opt_code opt_num;
9616 /* Handle the ARCH_STR argument to the arch= target attribute. */
9618 static bool
9619 aarch64_handle_attr_arch (const char *str)
9621 const struct processor *tmp_arch = NULL;
9622 enum aarch64_parse_opt_result parse_res
9623 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
9625 if (parse_res == AARCH64_PARSE_OK)
9627 gcc_assert (tmp_arch);
9628 selected_arch = tmp_arch;
9629 explicit_arch = selected_arch->arch;
9630 return true;
9633 switch (parse_res)
9635 case AARCH64_PARSE_MISSING_ARG:
9636 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
9637 break;
9638 case AARCH64_PARSE_INVALID_ARG:
9639 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
9640 aarch64_print_hint_for_arch (str);
9641 break;
9642 case AARCH64_PARSE_INVALID_FEATURE:
9643 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
9644 break;
9645 default:
9646 gcc_unreachable ();
9649 return false;
9652 /* Handle the argument CPU_STR to the cpu= target attribute. */
9654 static bool
9655 aarch64_handle_attr_cpu (const char *str)
9657 const struct processor *tmp_cpu = NULL;
9658 enum aarch64_parse_opt_result parse_res
9659 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
9661 if (parse_res == AARCH64_PARSE_OK)
9663 gcc_assert (tmp_cpu);
9664 selected_tune = tmp_cpu;
9665 explicit_tune_core = selected_tune->ident;
9667 selected_arch = &all_architectures[tmp_cpu->arch];
9668 explicit_arch = selected_arch->arch;
9669 return true;
9672 switch (parse_res)
9674 case AARCH64_PARSE_MISSING_ARG:
9675 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
9676 break;
9677 case AARCH64_PARSE_INVALID_ARG:
9678 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
9679 aarch64_print_hint_for_core (str);
9680 break;
9681 case AARCH64_PARSE_INVALID_FEATURE:
9682 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
9683 break;
9684 default:
9685 gcc_unreachable ();
9688 return false;
9691 /* Handle the argument STR to the tune= target attribute. */
9693 static bool
9694 aarch64_handle_attr_tune (const char *str)
9696 const struct processor *tmp_tune = NULL;
9697 enum aarch64_parse_opt_result parse_res
9698 = aarch64_parse_tune (str, &tmp_tune);
9700 if (parse_res == AARCH64_PARSE_OK)
9702 gcc_assert (tmp_tune);
9703 selected_tune = tmp_tune;
9704 explicit_tune_core = selected_tune->ident;
9705 return true;
9708 switch (parse_res)
9710 case AARCH64_PARSE_INVALID_ARG:
9711 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
9712 aarch64_print_hint_for_core (str);
9713 break;
9714 default:
9715 gcc_unreachable ();
9718 return false;
9721 /* Parse an architecture extensions target attribute string specified in STR.
9722 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9723 if successful. Update aarch64_isa_flags to reflect the ISA features
9724 modified. */
9726 static bool
9727 aarch64_handle_attr_isa_flags (char *str)
9729 enum aarch64_parse_opt_result parse_res;
9730 unsigned long isa_flags = aarch64_isa_flags;
9732 /* We allow "+nothing" in the beginning to clear out all architectural
9733 features if the user wants to handpick specific features. */
9734 if (strncmp ("+nothing", str, 8) == 0)
9736 isa_flags = 0;
9737 str += 8;
9740 parse_res = aarch64_parse_extension (str, &isa_flags);
9742 if (parse_res == AARCH64_PARSE_OK)
9744 aarch64_isa_flags = isa_flags;
9745 return true;
9748 switch (parse_res)
9750 case AARCH64_PARSE_MISSING_ARG:
9751 error ("missing value in %<target()%> pragma or attribute");
9752 break;
9754 case AARCH64_PARSE_INVALID_FEATURE:
9755 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
9756 break;
9758 default:
9759 gcc_unreachable ();
9762 return false;
9765 /* The target attributes that we support. On top of these we also support just
9766 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9767 handled explicitly in aarch64_process_one_target_attr. */
9769 static const struct aarch64_attribute_info aarch64_attributes[] =
9771 { "general-regs-only", aarch64_attr_mask, false, NULL,
9772 OPT_mgeneral_regs_only },
9773 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
9774 OPT_mfix_cortex_a53_835769 },
9775 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
9776 OPT_mfix_cortex_a53_843419 },
9777 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
9778 { "strict-align", aarch64_attr_mask, false, NULL, OPT_mstrict_align },
9779 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
9780 OPT_momit_leaf_frame_pointer },
9781 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
9782 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
9783 OPT_march_ },
9784 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
9785 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
9786 OPT_mtune_ },
9787 { "sign-return-address", aarch64_attr_enum, false, NULL,
9788 OPT_msign_return_address_ },
9789 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
9792 /* Parse ARG_STR which contains the definition of one target attribute.
9793 Show appropriate errors if any or return true if the attribute is valid. */
9795 static bool
9796 aarch64_process_one_target_attr (char *arg_str)
9798 bool invert = false;
9800 size_t len = strlen (arg_str);
9802 if (len == 0)
9804 error ("malformed %<target()%> pragma or attribute");
9805 return false;
9808 char *str_to_check = (char *) alloca (len + 1);
9809 strcpy (str_to_check, arg_str);
9811 /* Skip leading whitespace. */
9812 while (*str_to_check == ' ' || *str_to_check == '\t')
9813 str_to_check++;
9815 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9816 It is easier to detect and handle it explicitly here rather than going
9817 through the machinery for the rest of the target attributes in this
9818 function. */
9819 if (*str_to_check == '+')
9820 return aarch64_handle_attr_isa_flags (str_to_check);
9822 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
9824 invert = true;
9825 str_to_check += 3;
9827 char *arg = strchr (str_to_check, '=');
9829 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9830 and point ARG to "foo". */
9831 if (arg)
9833 *arg = '\0';
9834 arg++;
9836 const struct aarch64_attribute_info *p_attr;
9837 bool found = false;
9838 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
9840 /* If the names don't match up, or the user has given an argument
9841 to an attribute that doesn't accept one, or didn't give an argument
9842 to an attribute that expects one, fail to match. */
9843 if (strcmp (str_to_check, p_attr->name) != 0)
9844 continue;
9846 found = true;
9847 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
9848 || p_attr->attr_type == aarch64_attr_enum;
9850 if (attr_need_arg_p ^ (arg != NULL))
9852 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
9853 return false;
9856 /* If the name matches but the attribute does not allow "no-" versions
9857 then we can't match. */
9858 if (invert && !p_attr->allow_neg)
9860 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
9861 return false;
9864 switch (p_attr->attr_type)
9866 /* Has a custom handler registered.
9867 For example, cpu=, arch=, tune=. */
9868 case aarch64_attr_custom:
9869 gcc_assert (p_attr->handler);
9870 if (!p_attr->handler (arg))
9871 return false;
9872 break;
9874 /* Either set or unset a boolean option. */
9875 case aarch64_attr_bool:
9877 struct cl_decoded_option decoded;
9879 generate_option (p_attr->opt_num, NULL, !invert,
9880 CL_TARGET, &decoded);
9881 aarch64_handle_option (&global_options, &global_options_set,
9882 &decoded, input_location);
9883 break;
9885 /* Set or unset a bit in the target_flags. aarch64_handle_option
9886 should know what mask to apply given the option number. */
9887 case aarch64_attr_mask:
9889 struct cl_decoded_option decoded;
9890 /* We only need to specify the option number.
9891 aarch64_handle_option will know which mask to apply. */
9892 decoded.opt_index = p_attr->opt_num;
9893 decoded.value = !invert;
9894 aarch64_handle_option (&global_options, &global_options_set,
9895 &decoded, input_location);
9896 break;
9898 /* Use the option setting machinery to set an option to an enum. */
9899 case aarch64_attr_enum:
9901 gcc_assert (arg);
9902 bool valid;
9903 int value;
9904 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
9905 &value, CL_TARGET);
9906 if (valid)
9908 set_option (&global_options, NULL, p_attr->opt_num, value,
9909 NULL, DK_UNSPECIFIED, input_location,
9910 global_dc);
9912 else
9914 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
9916 break;
9918 default:
9919 gcc_unreachable ();
9923 /* If we reached here we either have found an attribute and validated
9924 it or didn't match any. If we matched an attribute but its arguments
9925 were malformed we will have returned false already. */
9926 return found;
9929 /* Count how many times the character C appears in
9930 NULL-terminated string STR. */
9932 static unsigned int
9933 num_occurences_in_str (char c, char *str)
9935 unsigned int res = 0;
9936 while (*str != '\0')
9938 if (*str == c)
9939 res++;
9941 str++;
9944 return res;
9947 /* Parse the tree in ARGS that contains the target attribute information
9948 and update the global target options space. */
9950 bool
9951 aarch64_process_target_attr (tree args)
9953 if (TREE_CODE (args) == TREE_LIST)
9957 tree head = TREE_VALUE (args);
9958 if (head)
9960 if (!aarch64_process_target_attr (head))
9961 return false;
9963 args = TREE_CHAIN (args);
9964 } while (args);
9966 return true;
9969 if (TREE_CODE (args) != STRING_CST)
9971 error ("attribute %<target%> argument not a string");
9972 return false;
9975 size_t len = strlen (TREE_STRING_POINTER (args));
9976 char *str_to_check = (char *) alloca (len + 1);
9977 strcpy (str_to_check, TREE_STRING_POINTER (args));
9979 if (len == 0)
9981 error ("malformed %<target()%> pragma or attribute");
9982 return false;
9985 /* Used to catch empty spaces between commas i.e.
9986 attribute ((target ("attr1,,attr2"))). */
9987 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
9989 /* Handle multiple target attributes separated by ','. */
9990 char *token = strtok (str_to_check, ",");
9992 unsigned int num_attrs = 0;
9993 while (token)
9995 num_attrs++;
9996 if (!aarch64_process_one_target_attr (token))
9998 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
9999 return false;
10002 token = strtok (NULL, ",");
10005 if (num_attrs != num_commas + 1)
10007 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
10008 return false;
10011 return true;
10014 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
10015 process attribute ((target ("..."))). */
10017 static bool
10018 aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
10020 struct cl_target_option cur_target;
10021 bool ret;
10022 tree old_optimize;
10023 tree new_target, new_optimize;
10024 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
10026 /* If what we're processing is the current pragma string then the
10027 target option node is already stored in target_option_current_node
10028 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
10029 having to re-parse the string. This is especially useful to keep
10030 arm_neon.h compile times down since that header contains a lot
10031 of intrinsics enclosed in pragmas. */
10032 if (!existing_target && args == current_target_pragma)
10034 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
10035 return true;
10037 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
10039 old_optimize = build_optimization_node (&global_options);
10040 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
10042 /* If the function changed the optimization levels as well as setting
10043 target options, start with the optimizations specified. */
10044 if (func_optimize && func_optimize != old_optimize)
10045 cl_optimization_restore (&global_options,
10046 TREE_OPTIMIZATION (func_optimize));
10048 /* Save the current target options to restore at the end. */
10049 cl_target_option_save (&cur_target, &global_options);
10051 /* If fndecl already has some target attributes applied to it, unpack
10052 them so that we add this attribute on top of them, rather than
10053 overwriting them. */
10054 if (existing_target)
10056 struct cl_target_option *existing_options
10057 = TREE_TARGET_OPTION (existing_target);
10059 if (existing_options)
10060 cl_target_option_restore (&global_options, existing_options);
10062 else
10063 cl_target_option_restore (&global_options,
10064 TREE_TARGET_OPTION (target_option_current_node));
10066 ret = aarch64_process_target_attr (args);
10068 /* Set up any additional state. */
10069 if (ret)
10071 aarch64_override_options_internal (&global_options);
10072 /* Initialize SIMD builtins if we haven't already.
10073 Set current_target_pragma to NULL for the duration so that
10074 the builtin initialization code doesn't try to tag the functions
10075 being built with the attributes specified by any current pragma, thus
10076 going into an infinite recursion. */
10077 if (TARGET_SIMD)
10079 tree saved_current_target_pragma = current_target_pragma;
10080 current_target_pragma = NULL;
10081 aarch64_init_simd_builtins ();
10082 current_target_pragma = saved_current_target_pragma;
10084 new_target = build_target_option_node (&global_options);
10086 else
10087 new_target = NULL;
10089 new_optimize = build_optimization_node (&global_options);
10091 if (fndecl && ret)
10093 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
10095 if (old_optimize != new_optimize)
10096 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
10099 cl_target_option_restore (&global_options, &cur_target);
10101 if (old_optimize != new_optimize)
10102 cl_optimization_restore (&global_options,
10103 TREE_OPTIMIZATION (old_optimize));
10104 return ret;
10107 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
10108 tri-bool options (yes, no, don't care) and the default value is
10109 DEF, determine whether to reject inlining. */
10111 static bool
10112 aarch64_tribools_ok_for_inlining_p (int caller, int callee,
10113 int dont_care, int def)
10115 /* If the callee doesn't care, always allow inlining. */
10116 if (callee == dont_care)
10117 return true;
10119 /* If the caller doesn't care, always allow inlining. */
10120 if (caller == dont_care)
10121 return true;
10123 /* Otherwise, allow inlining if either the callee and caller values
10124 agree, or if the callee is using the default value. */
10125 return (callee == caller || callee == def);
10128 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
10129 to inline CALLEE into CALLER based on target-specific info.
10130 Make sure that the caller and callee have compatible architectural
10131 features. Then go through the other possible target attributes
10132 and see if they can block inlining. Try not to reject always_inline
10133 callees unless they are incompatible architecturally. */
10135 static bool
10136 aarch64_can_inline_p (tree caller, tree callee)
10138 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
10139 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
10141 /* If callee has no option attributes, then it is ok to inline. */
10142 if (!callee_tree)
10143 return true;
10145 struct cl_target_option *caller_opts
10146 = TREE_TARGET_OPTION (caller_tree ? caller_tree
10147 : target_option_default_node);
10149 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
10152 /* Callee's ISA flags should be a subset of the caller's. */
10153 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
10154 != callee_opts->x_aarch64_isa_flags)
10155 return false;
10157 /* Allow non-strict aligned functions inlining into strict
10158 aligned ones. */
10159 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
10160 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
10161 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
10162 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
10163 return false;
10165 bool always_inline = lookup_attribute ("always_inline",
10166 DECL_ATTRIBUTES (callee));
10168 /* If the architectural features match up and the callee is always_inline
10169 then the other attributes don't matter. */
10170 if (always_inline)
10171 return true;
10173 if (caller_opts->x_aarch64_cmodel_var
10174 != callee_opts->x_aarch64_cmodel_var)
10175 return false;
10177 if (caller_opts->x_aarch64_tls_dialect
10178 != callee_opts->x_aarch64_tls_dialect)
10179 return false;
10181 /* Honour explicit requests to workaround errata. */
10182 if (!aarch64_tribools_ok_for_inlining_p (
10183 caller_opts->x_aarch64_fix_a53_err835769,
10184 callee_opts->x_aarch64_fix_a53_err835769,
10185 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
10186 return false;
10188 if (!aarch64_tribools_ok_for_inlining_p (
10189 caller_opts->x_aarch64_fix_a53_err843419,
10190 callee_opts->x_aarch64_fix_a53_err843419,
10191 2, TARGET_FIX_ERR_A53_843419))
10192 return false;
10194 /* If the user explicitly specified -momit-leaf-frame-pointer for the
10195 caller and calle and they don't match up, reject inlining. */
10196 if (!aarch64_tribools_ok_for_inlining_p (
10197 caller_opts->x_flag_omit_leaf_frame_pointer,
10198 callee_opts->x_flag_omit_leaf_frame_pointer,
10199 2, 1))
10200 return false;
10202 /* If the callee has specific tuning overrides, respect them. */
10203 if (callee_opts->x_aarch64_override_tune_string != NULL
10204 && caller_opts->x_aarch64_override_tune_string == NULL)
10205 return false;
10207 /* If the user specified tuning override strings for the
10208 caller and callee and they don't match up, reject inlining.
10209 We just do a string compare here, we don't analyze the meaning
10210 of the string, as it would be too costly for little gain. */
10211 if (callee_opts->x_aarch64_override_tune_string
10212 && caller_opts->x_aarch64_override_tune_string
10213 && (strcmp (callee_opts->x_aarch64_override_tune_string,
10214 caller_opts->x_aarch64_override_tune_string) != 0))
10215 return false;
10217 return true;
10220 /* Return true if SYMBOL_REF X binds locally. */
10222 static bool
10223 aarch64_symbol_binds_local_p (const_rtx x)
10225 return (SYMBOL_REF_DECL (x)
10226 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
10227 : SYMBOL_REF_LOCAL_P (x));
10230 /* Return true if SYMBOL_REF X is thread local */
10231 static bool
10232 aarch64_tls_symbol_p (rtx x)
10234 if (! TARGET_HAVE_TLS)
10235 return false;
10237 if (GET_CODE (x) != SYMBOL_REF)
10238 return false;
10240 return SYMBOL_REF_TLS_MODEL (x) != 0;
10243 /* Classify a TLS symbol into one of the TLS kinds. */
10244 enum aarch64_symbol_type
10245 aarch64_classify_tls_symbol (rtx x)
10247 enum tls_model tls_kind = tls_symbolic_operand_type (x);
10249 switch (tls_kind)
10251 case TLS_MODEL_GLOBAL_DYNAMIC:
10252 case TLS_MODEL_LOCAL_DYNAMIC:
10253 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
10255 case TLS_MODEL_INITIAL_EXEC:
10256 switch (aarch64_cmodel)
10258 case AARCH64_CMODEL_TINY:
10259 case AARCH64_CMODEL_TINY_PIC:
10260 return SYMBOL_TINY_TLSIE;
10261 default:
10262 return SYMBOL_SMALL_TLSIE;
10265 case TLS_MODEL_LOCAL_EXEC:
10266 if (aarch64_tls_size == 12)
10267 return SYMBOL_TLSLE12;
10268 else if (aarch64_tls_size == 24)
10269 return SYMBOL_TLSLE24;
10270 else if (aarch64_tls_size == 32)
10271 return SYMBOL_TLSLE32;
10272 else if (aarch64_tls_size == 48)
10273 return SYMBOL_TLSLE48;
10274 else
10275 gcc_unreachable ();
10277 case TLS_MODEL_EMULATED:
10278 case TLS_MODEL_NONE:
10279 return SYMBOL_FORCE_TO_MEM;
10281 default:
10282 gcc_unreachable ();
10286 /* Return the method that should be used to access SYMBOL_REF or
10287 LABEL_REF X. */
10289 enum aarch64_symbol_type
10290 aarch64_classify_symbol (rtx x, rtx offset)
10292 if (GET_CODE (x) == LABEL_REF)
10294 switch (aarch64_cmodel)
10296 case AARCH64_CMODEL_LARGE:
10297 return SYMBOL_FORCE_TO_MEM;
10299 case AARCH64_CMODEL_TINY_PIC:
10300 case AARCH64_CMODEL_TINY:
10301 return SYMBOL_TINY_ABSOLUTE;
10303 case AARCH64_CMODEL_SMALL_SPIC:
10304 case AARCH64_CMODEL_SMALL_PIC:
10305 case AARCH64_CMODEL_SMALL:
10306 return SYMBOL_SMALL_ABSOLUTE;
10308 default:
10309 gcc_unreachable ();
10313 if (GET_CODE (x) == SYMBOL_REF)
10315 if (aarch64_tls_symbol_p (x))
10316 return aarch64_classify_tls_symbol (x);
10318 switch (aarch64_cmodel)
10320 case AARCH64_CMODEL_TINY:
10321 /* When we retrieve symbol + offset address, we have to make sure
10322 the offset does not cause overflow of the final address. But
10323 we have no way of knowing the address of symbol at compile time
10324 so we can't accurately say if the distance between the PC and
10325 symbol + offset is outside the addressible range of +/-1M in the
10326 TINY code model. So we rely on images not being greater than
10327 1M and cap the offset at 1M and anything beyond 1M will have to
10328 be loaded using an alternative mechanism. Furthermore if the
10329 symbol is a weak reference to something that isn't known to
10330 resolve to a symbol in this module, then force to memory. */
10331 if ((SYMBOL_REF_WEAK (x)
10332 && !aarch64_symbol_binds_local_p (x))
10333 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
10334 return SYMBOL_FORCE_TO_MEM;
10335 return SYMBOL_TINY_ABSOLUTE;
10337 case AARCH64_CMODEL_SMALL:
10338 /* Same reasoning as the tiny code model, but the offset cap here is
10339 4G. */
10340 if ((SYMBOL_REF_WEAK (x)
10341 && !aarch64_symbol_binds_local_p (x))
10342 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
10343 HOST_WIDE_INT_C (4294967264)))
10344 return SYMBOL_FORCE_TO_MEM;
10345 return SYMBOL_SMALL_ABSOLUTE;
10347 case AARCH64_CMODEL_TINY_PIC:
10348 if (!aarch64_symbol_binds_local_p (x))
10349 return SYMBOL_TINY_GOT;
10350 return SYMBOL_TINY_ABSOLUTE;
10352 case AARCH64_CMODEL_SMALL_SPIC:
10353 case AARCH64_CMODEL_SMALL_PIC:
10354 if (!aarch64_symbol_binds_local_p (x))
10355 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
10356 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
10357 return SYMBOL_SMALL_ABSOLUTE;
10359 case AARCH64_CMODEL_LARGE:
10360 /* This is alright even in PIC code as the constant
10361 pool reference is always PC relative and within
10362 the same translation unit. */
10363 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
10364 return SYMBOL_SMALL_ABSOLUTE;
10365 else
10366 return SYMBOL_FORCE_TO_MEM;
10368 default:
10369 gcc_unreachable ();
10373 /* By default push everything into the constant pool. */
10374 return SYMBOL_FORCE_TO_MEM;
10377 bool
10378 aarch64_constant_address_p (rtx x)
10380 return (CONSTANT_P (x) && memory_address_p (DImode, x));
10383 bool
10384 aarch64_legitimate_pic_operand_p (rtx x)
10386 if (GET_CODE (x) == SYMBOL_REF
10387 || (GET_CODE (x) == CONST
10388 && GET_CODE (XEXP (x, 0)) == PLUS
10389 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
10390 return false;
10392 return true;
10395 /* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
10396 that should be rematerialized rather than spilled. */
10398 static bool
10399 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
10401 /* Support CSE and rematerialization of common constants. */
10402 if (CONST_INT_P (x) || CONST_DOUBLE_P (x) || GET_CODE (x) == CONST_VECTOR)
10403 return true;
10405 /* Do not allow vector struct mode constants. We could support
10406 0 and -1 easily, but they need support in aarch64-simd.md. */
10407 if (aarch64_vect_struct_mode_p (mode))
10408 return false;
10410 /* Do not allow wide int constants - this requires support in movti. */
10411 if (CONST_WIDE_INT_P (x))
10412 return false;
10414 /* Do not allow const (plus (anchor_symbol, const_int)). */
10415 if (GET_CODE (x) == CONST)
10417 rtx offset;
10419 split_const (x, &x, &offset);
10421 if (SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
10422 return false;
10425 if (GET_CODE (x) == HIGH)
10426 x = XEXP (x, 0);
10428 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10429 so spilling them is better than rematerialization. */
10430 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
10431 return true;
10433 /* Label references are always constant. */
10434 if (GET_CODE (x) == LABEL_REF)
10435 return true;
10437 return false;
10441 aarch64_load_tp (rtx target)
10443 if (!target
10444 || GET_MODE (target) != Pmode
10445 || !register_operand (target, Pmode))
10446 target = gen_reg_rtx (Pmode);
10448 /* Can return in any reg. */
10449 emit_insn (gen_aarch64_load_tp_hard (target));
10450 return target;
10453 /* On AAPCS systems, this is the "struct __va_list". */
10454 static GTY(()) tree va_list_type;
10456 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10457 Return the type to use as __builtin_va_list.
10459 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10461 struct __va_list
10463 void *__stack;
10464 void *__gr_top;
10465 void *__vr_top;
10466 int __gr_offs;
10467 int __vr_offs;
10468 }; */
10470 static tree
10471 aarch64_build_builtin_va_list (void)
10473 tree va_list_name;
10474 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10476 /* Create the type. */
10477 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
10478 /* Give it the required name. */
10479 va_list_name = build_decl (BUILTINS_LOCATION,
10480 TYPE_DECL,
10481 get_identifier ("__va_list"),
10482 va_list_type);
10483 DECL_ARTIFICIAL (va_list_name) = 1;
10484 TYPE_NAME (va_list_type) = va_list_name;
10485 TYPE_STUB_DECL (va_list_type) = va_list_name;
10487 /* Create the fields. */
10488 f_stack = build_decl (BUILTINS_LOCATION,
10489 FIELD_DECL, get_identifier ("__stack"),
10490 ptr_type_node);
10491 f_grtop = build_decl (BUILTINS_LOCATION,
10492 FIELD_DECL, get_identifier ("__gr_top"),
10493 ptr_type_node);
10494 f_vrtop = build_decl (BUILTINS_LOCATION,
10495 FIELD_DECL, get_identifier ("__vr_top"),
10496 ptr_type_node);
10497 f_groff = build_decl (BUILTINS_LOCATION,
10498 FIELD_DECL, get_identifier ("__gr_offs"),
10499 integer_type_node);
10500 f_vroff = build_decl (BUILTINS_LOCATION,
10501 FIELD_DECL, get_identifier ("__vr_offs"),
10502 integer_type_node);
10504 /* Tell tree-stdarg pass about our internal offset fields.
10505 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10506 purpose to identify whether the code is updating va_list internal
10507 offset fields through irregular way. */
10508 va_list_gpr_counter_field = f_groff;
10509 va_list_fpr_counter_field = f_vroff;
10511 DECL_ARTIFICIAL (f_stack) = 1;
10512 DECL_ARTIFICIAL (f_grtop) = 1;
10513 DECL_ARTIFICIAL (f_vrtop) = 1;
10514 DECL_ARTIFICIAL (f_groff) = 1;
10515 DECL_ARTIFICIAL (f_vroff) = 1;
10517 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
10518 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
10519 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
10520 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
10521 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
10523 TYPE_FIELDS (va_list_type) = f_stack;
10524 DECL_CHAIN (f_stack) = f_grtop;
10525 DECL_CHAIN (f_grtop) = f_vrtop;
10526 DECL_CHAIN (f_vrtop) = f_groff;
10527 DECL_CHAIN (f_groff) = f_vroff;
10529 /* Compute its layout. */
10530 layout_type (va_list_type);
10532 return va_list_type;
10535 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10536 static void
10537 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10539 const CUMULATIVE_ARGS *cum;
10540 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10541 tree stack, grtop, vrtop, groff, vroff;
10542 tree t;
10543 int gr_save_area_size = cfun->va_list_gpr_size;
10544 int vr_save_area_size = cfun->va_list_fpr_size;
10545 int vr_offset;
10547 cum = &crtl->args.info;
10548 if (cfun->va_list_gpr_size)
10549 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
10550 cfun->va_list_gpr_size);
10551 if (cfun->va_list_fpr_size)
10552 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
10553 * UNITS_PER_VREG, cfun->va_list_fpr_size);
10555 if (!TARGET_FLOAT)
10557 gcc_assert (cum->aapcs_nvrn == 0);
10558 vr_save_area_size = 0;
10561 f_stack = TYPE_FIELDS (va_list_type_node);
10562 f_grtop = DECL_CHAIN (f_stack);
10563 f_vrtop = DECL_CHAIN (f_grtop);
10564 f_groff = DECL_CHAIN (f_vrtop);
10565 f_vroff = DECL_CHAIN (f_groff);
10567 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
10568 NULL_TREE);
10569 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
10570 NULL_TREE);
10571 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
10572 NULL_TREE);
10573 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
10574 NULL_TREE);
10575 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
10576 NULL_TREE);
10578 /* Emit code to initialize STACK, which points to the next varargs stack
10579 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10580 by named arguments. STACK is 8-byte aligned. */
10581 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
10582 if (cum->aapcs_stack_size > 0)
10583 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
10584 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
10585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10587 /* Emit code to initialize GRTOP, the top of the GR save area.
10588 virtual_incoming_args_rtx should have been 16 byte aligned. */
10589 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
10590 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
10591 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10593 /* Emit code to initialize VRTOP, the top of the VR save area.
10594 This address is gr_save_area_bytes below GRTOP, rounded
10595 down to the next 16-byte boundary. */
10596 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
10597 vr_offset = ROUND_UP (gr_save_area_size,
10598 STACK_BOUNDARY / BITS_PER_UNIT);
10600 if (vr_offset)
10601 t = fold_build_pointer_plus_hwi (t, -vr_offset);
10602 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
10603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10605 /* Emit code to initialize GROFF, the offset from GRTOP of the
10606 next GPR argument. */
10607 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
10608 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
10609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10611 /* Likewise emit code to initialize VROFF, the offset from FTOP
10612 of the next VR argument. */
10613 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
10614 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
10615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10618 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10620 static tree
10621 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
10622 gimple_seq *post_p ATTRIBUTE_UNUSED)
10624 tree addr;
10625 bool indirect_p;
10626 bool is_ha; /* is HFA or HVA. */
10627 bool dw_align; /* double-word align. */
10628 machine_mode ag_mode = VOIDmode;
10629 int nregs;
10630 machine_mode mode;
10632 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
10633 tree stack, f_top, f_off, off, arg, roundup, on_stack;
10634 HOST_WIDE_INT size, rsize, adjust, align;
10635 tree t, u, cond1, cond2;
10637 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10638 if (indirect_p)
10639 type = build_pointer_type (type);
10641 mode = TYPE_MODE (type);
10643 f_stack = TYPE_FIELDS (va_list_type_node);
10644 f_grtop = DECL_CHAIN (f_stack);
10645 f_vrtop = DECL_CHAIN (f_grtop);
10646 f_groff = DECL_CHAIN (f_vrtop);
10647 f_vroff = DECL_CHAIN (f_groff);
10649 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
10650 f_stack, NULL_TREE);
10651 size = int_size_in_bytes (type);
10652 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
10654 dw_align = false;
10655 adjust = 0;
10656 if (aarch64_vfp_is_call_or_return_candidate (mode,
10657 type,
10658 &ag_mode,
10659 &nregs,
10660 &is_ha))
10662 /* TYPE passed in fp/simd registers. */
10663 if (!TARGET_FLOAT)
10664 aarch64_err_no_fpadvsimd (mode, "varargs");
10666 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
10667 unshare_expr (valist), f_vrtop, NULL_TREE);
10668 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
10669 unshare_expr (valist), f_vroff, NULL_TREE);
10671 rsize = nregs * UNITS_PER_VREG;
10673 if (is_ha)
10675 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
10676 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
10678 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
10679 && size < UNITS_PER_VREG)
10681 adjust = UNITS_PER_VREG - size;
10684 else
10686 /* TYPE passed in general registers. */
10687 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
10688 unshare_expr (valist), f_grtop, NULL_TREE);
10689 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
10690 unshare_expr (valist), f_groff, NULL_TREE);
10691 rsize = ROUND_UP (size, UNITS_PER_WORD);
10692 nregs = rsize / UNITS_PER_WORD;
10694 if (align > 8)
10695 dw_align = true;
10697 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
10698 && size < UNITS_PER_WORD)
10700 adjust = UNITS_PER_WORD - size;
10704 /* Get a local temporary for the field value. */
10705 off = get_initialized_tmp_var (f_off, pre_p, NULL);
10707 /* Emit code to branch if off >= 0. */
10708 t = build2 (GE_EXPR, boolean_type_node, off,
10709 build_int_cst (TREE_TYPE (off), 0));
10710 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
10712 if (dw_align)
10714 /* Emit: offs = (offs + 15) & -16. */
10715 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10716 build_int_cst (TREE_TYPE (off), 15));
10717 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
10718 build_int_cst (TREE_TYPE (off), -16));
10719 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
10721 else
10722 roundup = NULL;
10724 /* Update ap.__[g|v]r_offs */
10725 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
10726 build_int_cst (TREE_TYPE (off), rsize));
10727 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
10729 /* String up. */
10730 if (roundup)
10731 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10733 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10734 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
10735 build_int_cst (TREE_TYPE (f_off), 0));
10736 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
10738 /* String up: make sure the assignment happens before the use. */
10739 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
10740 COND_EXPR_ELSE (cond1) = t;
10742 /* Prepare the trees handling the argument that is passed on the stack;
10743 the top level node will store in ON_STACK. */
10744 arg = get_initialized_tmp_var (stack, pre_p, NULL);
10745 if (align > 8)
10747 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10748 t = fold_convert (intDI_type_node, arg);
10749 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10750 build_int_cst (TREE_TYPE (t), 15));
10751 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10752 build_int_cst (TREE_TYPE (t), -16));
10753 t = fold_convert (TREE_TYPE (arg), t);
10754 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
10756 else
10757 roundup = NULL;
10758 /* Advance ap.__stack */
10759 t = fold_convert (intDI_type_node, arg);
10760 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
10761 build_int_cst (TREE_TYPE (t), size + 7));
10762 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10763 build_int_cst (TREE_TYPE (t), -8));
10764 t = fold_convert (TREE_TYPE (arg), t);
10765 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
10766 /* String up roundup and advance. */
10767 if (roundup)
10768 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
10769 /* String up with arg */
10770 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
10771 /* Big-endianness related address adjustment. */
10772 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
10773 && size < UNITS_PER_WORD)
10775 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
10776 size_int (UNITS_PER_WORD - size));
10777 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
10780 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
10781 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
10783 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10784 t = off;
10785 if (adjust)
10786 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
10787 build_int_cst (TREE_TYPE (off), adjust));
10789 t = fold_convert (sizetype, t);
10790 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
10792 if (is_ha)
10794 /* type ha; // treat as "struct {ftype field[n];}"
10795 ... [computing offs]
10796 for (i = 0; i <nregs; ++i, offs += 16)
10797 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10798 return ha; */
10799 int i;
10800 tree tmp_ha, field_t, field_ptr_t;
10802 /* Declare a local variable. */
10803 tmp_ha = create_tmp_var_raw (type, "ha");
10804 gimple_add_tmp_var (tmp_ha);
10806 /* Establish the base type. */
10807 switch (ag_mode)
10809 case E_SFmode:
10810 field_t = float_type_node;
10811 field_ptr_t = float_ptr_type_node;
10812 break;
10813 case E_DFmode:
10814 field_t = double_type_node;
10815 field_ptr_t = double_ptr_type_node;
10816 break;
10817 case E_TFmode:
10818 field_t = long_double_type_node;
10819 field_ptr_t = long_double_ptr_type_node;
10820 break;
10821 case E_HFmode:
10822 field_t = aarch64_fp16_type_node;
10823 field_ptr_t = aarch64_fp16_ptr_type_node;
10824 break;
10825 case E_V2SImode:
10826 case E_V4SImode:
10828 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
10829 field_t = build_vector_type_for_mode (innertype, ag_mode);
10830 field_ptr_t = build_pointer_type (field_t);
10832 break;
10833 default:
10834 gcc_assert (0);
10837 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10838 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
10839 addr = t;
10840 t = fold_convert (field_ptr_t, addr);
10841 t = build2 (MODIFY_EXPR, field_t,
10842 build1 (INDIRECT_REF, field_t, tmp_ha),
10843 build1 (INDIRECT_REF, field_t, t));
10845 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10846 for (i = 1; i < nregs; ++i)
10848 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
10849 u = fold_convert (field_ptr_t, addr);
10850 u = build2 (MODIFY_EXPR, field_t,
10851 build2 (MEM_REF, field_t, tmp_ha,
10852 build_int_cst (field_ptr_t,
10853 (i *
10854 int_size_in_bytes (field_t)))),
10855 build1 (INDIRECT_REF, field_t, u));
10856 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
10859 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
10860 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
10863 COND_EXPR_ELSE (cond2) = t;
10864 addr = fold_convert (build_pointer_type (type), cond1);
10865 addr = build_va_arg_indirect_ref (addr);
10867 if (indirect_p)
10868 addr = build_va_arg_indirect_ref (addr);
10870 return addr;
10873 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
10875 static void
10876 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10877 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10878 int no_rtl)
10880 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10881 CUMULATIVE_ARGS local_cum;
10882 int gr_saved = cfun->va_list_gpr_size;
10883 int vr_saved = cfun->va_list_fpr_size;
10885 /* The caller has advanced CUM up to, but not beyond, the last named
10886 argument. Advance a local copy of CUM past the last "real" named
10887 argument, to find out how many registers are left over. */
10888 local_cum = *cum;
10889 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
10891 /* Found out how many registers we need to save.
10892 Honor tree-stdvar analysis results. */
10893 if (cfun->va_list_gpr_size)
10894 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
10895 cfun->va_list_gpr_size / UNITS_PER_WORD);
10896 if (cfun->va_list_fpr_size)
10897 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
10898 cfun->va_list_fpr_size / UNITS_PER_VREG);
10900 if (!TARGET_FLOAT)
10902 gcc_assert (local_cum.aapcs_nvrn == 0);
10903 vr_saved = 0;
10906 if (!no_rtl)
10908 if (gr_saved > 0)
10910 rtx ptr, mem;
10912 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10913 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
10914 - gr_saved * UNITS_PER_WORD);
10915 mem = gen_frame_mem (BLKmode, ptr);
10916 set_mem_alias_set (mem, get_varargs_alias_set ());
10918 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
10919 mem, gr_saved);
10921 if (vr_saved > 0)
10923 /* We can't use move_block_from_reg, because it will use
10924 the wrong mode, storing D regs only. */
10925 machine_mode mode = TImode;
10926 int off, i, vr_start;
10928 /* Set OFF to the offset from virtual_incoming_args_rtx of
10929 the first vector register. The VR save area lies below
10930 the GR one, and is aligned to 16 bytes. */
10931 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
10932 STACK_BOUNDARY / BITS_PER_UNIT);
10933 off -= vr_saved * UNITS_PER_VREG;
10935 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
10936 for (i = 0; i < vr_saved; ++i)
10938 rtx ptr, mem;
10940 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
10941 mem = gen_frame_mem (mode, ptr);
10942 set_mem_alias_set (mem, get_varargs_alias_set ());
10943 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
10944 off += UNITS_PER_VREG;
10949 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10950 any complication of having crtl->args.pretend_args_size changed. */
10951 cfun->machine->frame.saved_varargs_size
10952 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
10953 STACK_BOUNDARY / BITS_PER_UNIT)
10954 + vr_saved * UNITS_PER_VREG);
10957 static void
10958 aarch64_conditional_register_usage (void)
10960 int i;
10961 if (!TARGET_FLOAT)
10963 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
10965 fixed_regs[i] = 1;
10966 call_used_regs[i] = 1;
10971 /* Walk down the type tree of TYPE counting consecutive base elements.
10972 If *MODEP is VOIDmode, then set it to the first valid floating point
10973 type. If a non-floating point type is found, or if a floating point
10974 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10975 otherwise return the count in the sub-tree. */
10976 static int
10977 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
10979 machine_mode mode;
10980 HOST_WIDE_INT size;
10982 switch (TREE_CODE (type))
10984 case REAL_TYPE:
10985 mode = TYPE_MODE (type);
10986 if (mode != DFmode && mode != SFmode
10987 && mode != TFmode && mode != HFmode)
10988 return -1;
10990 if (*modep == VOIDmode)
10991 *modep = mode;
10993 if (*modep == mode)
10994 return 1;
10996 break;
10998 case COMPLEX_TYPE:
10999 mode = TYPE_MODE (TREE_TYPE (type));
11000 if (mode != DFmode && mode != SFmode
11001 && mode != TFmode && mode != HFmode)
11002 return -1;
11004 if (*modep == VOIDmode)
11005 *modep = mode;
11007 if (*modep == mode)
11008 return 2;
11010 break;
11012 case VECTOR_TYPE:
11013 /* Use V2SImode and V4SImode as representatives of all 64-bit
11014 and 128-bit vector types. */
11015 size = int_size_in_bytes (type);
11016 switch (size)
11018 case 8:
11019 mode = V2SImode;
11020 break;
11021 case 16:
11022 mode = V4SImode;
11023 break;
11024 default:
11025 return -1;
11028 if (*modep == VOIDmode)
11029 *modep = mode;
11031 /* Vector modes are considered to be opaque: two vectors are
11032 equivalent for the purposes of being homogeneous aggregates
11033 if they are the same size. */
11034 if (*modep == mode)
11035 return 1;
11037 break;
11039 case ARRAY_TYPE:
11041 int count;
11042 tree index = TYPE_DOMAIN (type);
11044 /* Can't handle incomplete types nor sizes that are not
11045 fixed. */
11046 if (!COMPLETE_TYPE_P (type)
11047 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11048 return -1;
11050 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
11051 if (count == -1
11052 || !index
11053 || !TYPE_MAX_VALUE (index)
11054 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
11055 || !TYPE_MIN_VALUE (index)
11056 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
11057 || count < 0)
11058 return -1;
11060 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11061 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11063 /* There must be no padding. */
11064 if (wi::to_wide (TYPE_SIZE (type))
11065 != count * GET_MODE_BITSIZE (*modep))
11066 return -1;
11068 return count;
11071 case RECORD_TYPE:
11073 int count = 0;
11074 int sub_count;
11075 tree field;
11077 /* Can't handle incomplete types nor sizes that are not
11078 fixed. */
11079 if (!COMPLETE_TYPE_P (type)
11080 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11081 return -1;
11083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11085 if (TREE_CODE (field) != FIELD_DECL)
11086 continue;
11088 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11089 if (sub_count < 0)
11090 return -1;
11091 count += sub_count;
11094 /* There must be no padding. */
11095 if (wi::to_wide (TYPE_SIZE (type))
11096 != count * GET_MODE_BITSIZE (*modep))
11097 return -1;
11099 return count;
11102 case UNION_TYPE:
11103 case QUAL_UNION_TYPE:
11105 /* These aren't very interesting except in a degenerate case. */
11106 int count = 0;
11107 int sub_count;
11108 tree field;
11110 /* Can't handle incomplete types nor sizes that are not
11111 fixed. */
11112 if (!COMPLETE_TYPE_P (type)
11113 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11114 return -1;
11116 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11118 if (TREE_CODE (field) != FIELD_DECL)
11119 continue;
11121 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
11122 if (sub_count < 0)
11123 return -1;
11124 count = count > sub_count ? count : sub_count;
11127 /* There must be no padding. */
11128 if (wi::to_wide (TYPE_SIZE (type))
11129 != count * GET_MODE_BITSIZE (*modep))
11130 return -1;
11132 return count;
11135 default:
11136 break;
11139 return -1;
11142 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
11143 type as described in AAPCS64 \S 4.1.2.
11145 See the comment above aarch64_composite_type_p for the notes on MODE. */
11147 static bool
11148 aarch64_short_vector_p (const_tree type,
11149 machine_mode mode)
11151 HOST_WIDE_INT size = -1;
11153 if (type && TREE_CODE (type) == VECTOR_TYPE)
11154 size = int_size_in_bytes (type);
11155 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
11156 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11157 size = GET_MODE_SIZE (mode);
11159 return (size == 8 || size == 16);
11162 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
11163 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
11164 array types. The C99 floating-point complex types are also considered
11165 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
11166 types, which are GCC extensions and out of the scope of AAPCS64, are
11167 treated as composite types here as well.
11169 Note that MODE itself is not sufficient in determining whether a type
11170 is such a composite type or not. This is because
11171 stor-layout.c:compute_record_mode may have already changed the MODE
11172 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
11173 structure with only one field may have its MODE set to the mode of the
11174 field. Also an integer mode whose size matches the size of the
11175 RECORD_TYPE type may be used to substitute the original mode
11176 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
11177 solely relied on. */
11179 static bool
11180 aarch64_composite_type_p (const_tree type,
11181 machine_mode mode)
11183 if (aarch64_short_vector_p (type, mode))
11184 return false;
11186 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
11187 return true;
11189 if (mode == BLKmode
11190 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
11191 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
11192 return true;
11194 return false;
11197 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
11198 shall be passed or returned in simd/fp register(s) (providing these
11199 parameter passing registers are available).
11201 Upon successful return, *COUNT returns the number of needed registers,
11202 *BASE_MODE returns the mode of the individual register and when IS_HAF
11203 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
11204 floating-point aggregate or a homogeneous short-vector aggregate. */
11206 static bool
11207 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
11208 const_tree type,
11209 machine_mode *base_mode,
11210 int *count,
11211 bool *is_ha)
11213 machine_mode new_mode = VOIDmode;
11214 bool composite_p = aarch64_composite_type_p (type, mode);
11216 if (is_ha != NULL) *is_ha = false;
11218 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
11219 || aarch64_short_vector_p (type, mode))
11221 *count = 1;
11222 new_mode = mode;
11224 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
11226 if (is_ha != NULL) *is_ha = true;
11227 *count = 2;
11228 new_mode = GET_MODE_INNER (mode);
11230 else if (type && composite_p)
11232 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
11234 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
11236 if (is_ha != NULL) *is_ha = true;
11237 *count = ag_count;
11239 else
11240 return false;
11242 else
11243 return false;
11245 *base_mode = new_mode;
11246 return true;
11249 /* Implement TARGET_STRUCT_VALUE_RTX. */
11251 static rtx
11252 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
11253 int incoming ATTRIBUTE_UNUSED)
11255 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
11258 /* Implements target hook vector_mode_supported_p. */
11259 static bool
11260 aarch64_vector_mode_supported_p (machine_mode mode)
11262 if (TARGET_SIMD
11263 && (mode == V4SImode || mode == V8HImode
11264 || mode == V16QImode || mode == V2DImode
11265 || mode == V2SImode || mode == V4HImode
11266 || mode == V8QImode || mode == V2SFmode
11267 || mode == V4SFmode || mode == V2DFmode
11268 || mode == V4HFmode || mode == V8HFmode
11269 || mode == V1DFmode))
11270 return true;
11272 return false;
11275 /* Return appropriate SIMD container
11276 for MODE within a vector of WIDTH bits. */
11277 static machine_mode
11278 aarch64_simd_container_mode (scalar_mode mode, unsigned width)
11280 gcc_assert (width == 64 || width == 128);
11281 if (TARGET_SIMD)
11283 if (width == 128)
11284 switch (mode)
11286 case E_DFmode:
11287 return V2DFmode;
11288 case E_SFmode:
11289 return V4SFmode;
11290 case E_HFmode:
11291 return V8HFmode;
11292 case E_SImode:
11293 return V4SImode;
11294 case E_HImode:
11295 return V8HImode;
11296 case E_QImode:
11297 return V16QImode;
11298 case E_DImode:
11299 return V2DImode;
11300 default:
11301 break;
11303 else
11304 switch (mode)
11306 case E_SFmode:
11307 return V2SFmode;
11308 case E_HFmode:
11309 return V4HFmode;
11310 case E_SImode:
11311 return V2SImode;
11312 case E_HImode:
11313 return V4HImode;
11314 case E_QImode:
11315 return V8QImode;
11316 default:
11317 break;
11320 return word_mode;
11323 /* Return 128-bit container as the preferred SIMD mode for MODE. */
11324 static machine_mode
11325 aarch64_preferred_simd_mode (scalar_mode mode)
11327 return aarch64_simd_container_mode (mode, 128);
11330 /* Return the bitmask of possible vector sizes for the vectorizer
11331 to iterate over. */
11332 static unsigned int
11333 aarch64_autovectorize_vector_sizes (void)
11335 return (16 | 8);
11338 /* Implement TARGET_MANGLE_TYPE. */
11340 static const char *
11341 aarch64_mangle_type (const_tree type)
11343 /* The AArch64 ABI documents say that "__va_list" has to be
11344 managled as if it is in the "std" namespace. */
11345 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
11346 return "St9__va_list";
11348 /* Half-precision float. */
11349 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
11350 return "Dh";
11352 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11353 builtin types. */
11354 if (TYPE_NAME (type) != NULL)
11355 return aarch64_mangle_builtin_type (type);
11357 /* Use the default mangling. */
11358 return NULL;
11361 /* Find the first rtx_insn before insn that will generate an assembly
11362 instruction. */
11364 static rtx_insn *
11365 aarch64_prev_real_insn (rtx_insn *insn)
11367 if (!insn)
11368 return NULL;
11372 insn = prev_real_insn (insn);
11374 while (insn && recog_memoized (insn) < 0);
11376 return insn;
11379 static bool
11380 is_madd_op (enum attr_type t1)
11382 unsigned int i;
11383 /* A number of these may be AArch32 only. */
11384 enum attr_type mlatypes[] = {
11385 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
11386 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
11387 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
11390 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
11392 if (t1 == mlatypes[i])
11393 return true;
11396 return false;
11399 /* Check if there is a register dependency between a load and the insn
11400 for which we hold recog_data. */
11402 static bool
11403 dep_between_memop_and_curr (rtx memop)
11405 rtx load_reg;
11406 int opno;
11408 gcc_assert (GET_CODE (memop) == SET);
11410 if (!REG_P (SET_DEST (memop)))
11411 return false;
11413 load_reg = SET_DEST (memop);
11414 for (opno = 1; opno < recog_data.n_operands; opno++)
11416 rtx operand = recog_data.operand[opno];
11417 if (REG_P (operand)
11418 && reg_overlap_mentioned_p (load_reg, operand))
11419 return true;
11422 return false;
11426 /* When working around the Cortex-A53 erratum 835769,
11427 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11428 instruction and has a preceding memory instruction such that a NOP
11429 should be inserted between them. */
11431 bool
11432 aarch64_madd_needs_nop (rtx_insn* insn)
11434 enum attr_type attr_type;
11435 rtx_insn *prev;
11436 rtx body;
11438 if (!TARGET_FIX_ERR_A53_835769)
11439 return false;
11441 if (!INSN_P (insn) || recog_memoized (insn) < 0)
11442 return false;
11444 attr_type = get_attr_type (insn);
11445 if (!is_madd_op (attr_type))
11446 return false;
11448 prev = aarch64_prev_real_insn (insn);
11449 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11450 Restore recog state to INSN to avoid state corruption. */
11451 extract_constrain_insn_cached (insn);
11453 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
11454 return false;
11456 body = single_set (prev);
11458 /* If the previous insn is a memory op and there is no dependency between
11459 it and the DImode madd, emit a NOP between them. If body is NULL then we
11460 have a complex memory operation, probably a load/store pair.
11461 Be conservative for now and emit a NOP. */
11462 if (GET_MODE (recog_data.operand[0]) == DImode
11463 && (!body || !dep_between_memop_and_curr (body)))
11464 return true;
11466 return false;
11471 /* Implement FINAL_PRESCAN_INSN. */
11473 void
11474 aarch64_final_prescan_insn (rtx_insn *insn)
11476 if (aarch64_madd_needs_nop (insn))
11477 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
11481 /* Return the equivalent letter for size. */
11482 static char
11483 sizetochar (int size)
11485 switch (size)
11487 case 64: return 'd';
11488 case 32: return 's';
11489 case 16: return 'h';
11490 case 8 : return 'b';
11491 default: gcc_unreachable ();
11495 /* Return true iff x is a uniform vector of floating-point
11496 constants, and the constant can be represented in
11497 quarter-precision form. Note, as aarch64_float_const_representable
11498 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11499 static bool
11500 aarch64_vect_float_const_representable_p (rtx x)
11502 rtx elt;
11503 return (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
11504 && const_vec_duplicate_p (x, &elt)
11505 && aarch64_float_const_representable_p (elt));
11508 /* Return true for valid and false for invalid. */
11509 bool
11510 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
11511 struct simd_immediate_info *info,
11512 enum simd_immediate_check which)
11514 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11515 matches = 1; \
11516 for (i = 0; i < idx; i += (STRIDE)) \
11517 if (!(TEST)) \
11518 matches = 0; \
11519 if (matches) \
11521 immtype = (CLASS); \
11522 elsize = (ELSIZE); \
11523 eshift = (SHIFT); \
11524 emvn = (NEG); \
11525 break; \
11528 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
11529 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11530 unsigned char bytes[16];
11531 int immtype = -1, matches;
11532 unsigned int invmask = inverse ? 0xff : 0;
11533 int eshift, emvn;
11535 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11537 if (! (aarch64_simd_imm_zero_p (op, mode)
11538 || aarch64_vect_float_const_representable_p (op)))
11539 return false;
11541 if (info)
11543 rtx elt = CONST_VECTOR_ELT (op, 0);
11544 scalar_float_mode elt_mode
11545 = as_a <scalar_float_mode> (GET_MODE (elt));
11547 info->value = elt;
11548 info->element_width = GET_MODE_BITSIZE (elt_mode);
11549 info->mvn = false;
11550 info->shift = 0;
11553 return true;
11556 /* Splat vector constant out into a byte vector. */
11557 for (i = 0; i < n_elts; i++)
11559 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11560 it must be laid out in the vector register in reverse order. */
11561 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
11562 unsigned HOST_WIDE_INT elpart;
11564 gcc_assert (CONST_INT_P (el));
11565 elpart = INTVAL (el);
11567 for (unsigned int byte = 0; byte < innersize; byte++)
11569 bytes[idx++] = (elpart & 0xff) ^ invmask;
11570 elpart >>= BITS_PER_UNIT;
11575 /* Sanity check. */
11576 gcc_assert (idx == GET_MODE_SIZE (mode));
11580 if (which & AARCH64_CHECK_ORR)
11582 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11583 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
11585 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11586 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11588 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11589 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11591 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11592 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
11594 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
11596 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
11599 if (which & AARCH64_CHECK_BIC)
11601 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11602 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
11604 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11605 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11607 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11608 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11610 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11611 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
11613 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
11615 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
11618 /* Shifting ones / 8-bit / 64-bit variants only checked
11619 for 'ALL' (MOVI/MVNI). */
11620 if (which == AARCH64_CHECK_MOV)
11622 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11623 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
11625 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11626 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
11628 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11629 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
11631 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11632 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
11634 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
11636 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11637 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
11640 while (0);
11642 if (immtype == -1)
11643 return false;
11645 if (info)
11647 info->element_width = elsize;
11648 info->mvn = emvn != 0;
11649 info->shift = eshift;
11651 unsigned HOST_WIDE_INT imm = 0;
11653 if (immtype >= 12 && immtype <= 15)
11654 info->msl = true;
11656 /* Un-invert bytes of recognized vector, if necessary. */
11657 if (invmask != 0)
11658 for (i = 0; i < idx; i++)
11659 bytes[i] ^= invmask;
11661 if (immtype == 17)
11663 /* FIXME: Broken on 32-bit H_W_I hosts. */
11664 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11666 for (i = 0; i < 8; i++)
11667 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11668 << (i * BITS_PER_UNIT);
11671 info->value = GEN_INT (imm);
11673 else
11675 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11676 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11678 /* Construct 'abcdefgh' because the assembler cannot handle
11679 generic constants. */
11680 if (info->mvn)
11681 imm = ~imm;
11682 imm = (imm >> info->shift) & 0xff;
11683 info->value = GEN_INT (imm);
11687 return true;
11688 #undef CHECK
11691 /* Check of immediate shift constants are within range. */
11692 bool
11693 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
11695 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
11696 if (left)
11697 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
11698 else
11699 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
11702 /* Return true if X is a uniform vector where all elements
11703 are either the floating-point constant 0.0 or the
11704 integer constant 0. */
11705 bool
11706 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
11708 return x == CONST0_RTX (mode);
11712 /* Return the bitmask CONST_INT to select the bits required by a zero extract
11713 operation of width WIDTH at bit position POS. */
11716 aarch64_mask_from_zextract_ops (rtx width, rtx pos)
11718 gcc_assert (CONST_INT_P (width));
11719 gcc_assert (CONST_INT_P (pos));
11721 unsigned HOST_WIDE_INT mask
11722 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
11723 return GEN_INT (mask << UINTVAL (pos));
11726 bool
11727 aarch64_mov_operand_p (rtx x, machine_mode mode)
11729 if (GET_CODE (x) == HIGH
11730 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
11731 return true;
11733 if (CONST_INT_P (x))
11734 return true;
11736 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
11737 return true;
11739 return aarch64_classify_symbolic_expression (x)
11740 == SYMBOL_TINY_ABSOLUTE;
11743 /* Return a const_int vector of VAL. */
11745 aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
11747 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
11748 return gen_const_vec_duplicate (mode, c);
11751 /* Check OP is a legal scalar immediate for the MOVI instruction. */
11753 bool
11754 aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
11756 machine_mode vmode;
11758 vmode = aarch64_preferred_simd_mode (mode);
11759 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
11760 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
11763 /* Construct and return a PARALLEL RTX vector with elements numbering the
11764 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11765 the vector - from the perspective of the architecture. This does not
11766 line up with GCC's perspective on lane numbers, so we end up with
11767 different masks depending on our target endian-ness. The diagram
11768 below may help. We must draw the distinction when building masks
11769 which select one half of the vector. An instruction selecting
11770 architectural low-lanes for a big-endian target, must be described using
11771 a mask selecting GCC high-lanes.
11773 Big-Endian Little-Endian
11775 GCC 0 1 2 3 3 2 1 0
11776 | x | x | x | x | | x | x | x | x |
11777 Architecture 3 2 1 0 3 2 1 0
11779 Low Mask: { 2, 3 } { 0, 1 }
11780 High Mask: { 0, 1 } { 2, 3 }
11782 MODE Is the mode of the vector and NUNITS is the number of units in it. */
11785 aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
11787 rtvec v = rtvec_alloc (nunits / 2);
11788 int high_base = nunits / 2;
11789 int low_base = 0;
11790 int base;
11791 rtx t1;
11792 int i;
11794 if (BYTES_BIG_ENDIAN)
11795 base = high ? low_base : high_base;
11796 else
11797 base = high ? high_base : low_base;
11799 for (i = 0; i < nunits / 2; i++)
11800 RTVEC_ELT (v, i) = GEN_INT (base + i);
11802 t1 = gen_rtx_PARALLEL (mode, v);
11803 return t1;
11806 /* Check OP for validity as a PARALLEL RTX vector with elements
11807 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11808 from the perspective of the architecture. See the diagram above
11809 aarch64_simd_vect_par_cnst_half for more details. */
11811 bool
11812 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
11813 bool high)
11815 if (!VECTOR_MODE_P (mode))
11816 return false;
11818 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, GET_MODE_NUNITS (mode),
11819 high);
11820 HOST_WIDE_INT count_op = XVECLEN (op, 0);
11821 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
11822 int i = 0;
11824 if (count_op != count_ideal)
11825 return false;
11827 for (i = 0; i < count_ideal; i++)
11829 rtx elt_op = XVECEXP (op, 0, i);
11830 rtx elt_ideal = XVECEXP (ideal, 0, i);
11832 if (!CONST_INT_P (elt_op)
11833 || INTVAL (elt_ideal) != INTVAL (elt_op))
11834 return false;
11836 return true;
11839 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11840 HIGH (exclusive). */
11841 void
11842 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
11843 const_tree exp)
11845 HOST_WIDE_INT lane;
11846 gcc_assert (CONST_INT_P (operand));
11847 lane = INTVAL (operand);
11849 if (lane < low || lane >= high)
11851 if (exp)
11852 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
11853 else
11854 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
11858 /* Peform endian correction on lane number N, which indexes a vector
11859 of mode MODE, and return the result as an SImode rtx. */
11862 aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
11864 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
11867 /* Return TRUE if OP is a valid vector addressing mode. */
11868 bool
11869 aarch64_simd_mem_operand_p (rtx op)
11871 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
11872 || REG_P (XEXP (op, 0)));
11875 /* Emit a register copy from operand to operand, taking care not to
11876 early-clobber source registers in the process.
11878 COUNT is the number of components into which the copy needs to be
11879 decomposed. */
11880 void
11881 aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
11882 unsigned int count)
11884 unsigned int i;
11885 int rdest = REGNO (operands[0]);
11886 int rsrc = REGNO (operands[1]);
11888 if (!reg_overlap_mentioned_p (operands[0], operands[1])
11889 || rdest < rsrc)
11890 for (i = 0; i < count; i++)
11891 emit_move_insn (gen_rtx_REG (mode, rdest + i),
11892 gen_rtx_REG (mode, rsrc + i));
11893 else
11894 for (i = 0; i < count; i++)
11895 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
11896 gen_rtx_REG (mode, rsrc + count - i - 1));
11899 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
11900 one of VSTRUCT modes: OI, CI, or XI. */
11902 aarch64_simd_attr_length_rglist (machine_mode mode)
11904 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4;
11907 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11908 alignment of a vector to 128 bits. */
11909 static HOST_WIDE_INT
11910 aarch64_simd_vector_alignment (const_tree type)
11912 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
11913 return MIN (align, 128);
11916 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11917 static bool
11918 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
11920 if (is_packed)
11921 return false;
11923 /* We guarantee alignment for vectors up to 128-bits. */
11924 if (tree_int_cst_compare (TYPE_SIZE (type),
11925 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
11926 return false;
11928 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11929 return true;
11932 /* Return true if the vector misalignment factor is supported by the
11933 target. */
11934 static bool
11935 aarch64_builtin_support_vector_misalignment (machine_mode mode,
11936 const_tree type, int misalignment,
11937 bool is_packed)
11939 if (TARGET_SIMD && STRICT_ALIGNMENT)
11941 /* Return if movmisalign pattern is not supported for this mode. */
11942 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
11943 return false;
11945 /* Misalignment factor is unknown at compile time. */
11946 if (misalignment == -1)
11947 return false;
11949 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11950 is_packed);
11953 /* If VALS is a vector constant that can be loaded into a register
11954 using DUP, generate instructions to do so and return an RTX to
11955 assign to the register. Otherwise return NULL_RTX. */
11956 static rtx
11957 aarch64_simd_dup_constant (rtx vals)
11959 machine_mode mode = GET_MODE (vals);
11960 machine_mode inner_mode = GET_MODE_INNER (mode);
11961 rtx x;
11963 if (!const_vec_duplicate_p (vals, &x))
11964 return NULL_RTX;
11966 /* We can load this constant by using DUP and a constant in a
11967 single ARM register. This will be cheaper than a vector
11968 load. */
11969 x = copy_to_mode_reg (inner_mode, x);
11970 return gen_vec_duplicate (mode, x);
11974 /* Generate code to load VALS, which is a PARALLEL containing only
11975 constants (for vec_init) or CONST_VECTOR, efficiently into a
11976 register. Returns an RTX to copy into the register, or NULL_RTX
11977 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11978 static rtx
11979 aarch64_simd_make_constant (rtx vals)
11981 machine_mode mode = GET_MODE (vals);
11982 rtx const_dup;
11983 rtx const_vec = NULL_RTX;
11984 int n_elts = GET_MODE_NUNITS (mode);
11985 int n_const = 0;
11986 int i;
11988 if (GET_CODE (vals) == CONST_VECTOR)
11989 const_vec = vals;
11990 else if (GET_CODE (vals) == PARALLEL)
11992 /* A CONST_VECTOR must contain only CONST_INTs and
11993 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11994 Only store valid constants in a CONST_VECTOR. */
11995 for (i = 0; i < n_elts; ++i)
11997 rtx x = XVECEXP (vals, 0, i);
11998 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
11999 n_const++;
12001 if (n_const == n_elts)
12002 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12004 else
12005 gcc_unreachable ();
12007 if (const_vec != NULL_RTX
12008 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
12009 /* Load using MOVI/MVNI. */
12010 return const_vec;
12011 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
12012 /* Loaded using DUP. */
12013 return const_dup;
12014 else if (const_vec != NULL_RTX)
12015 /* Load from constant pool. We can not take advantage of single-cycle
12016 LD1 because we need a PC-relative addressing mode. */
12017 return const_vec;
12018 else
12019 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12020 We can not construct an initializer. */
12021 return NULL_RTX;
12024 /* Expand a vector initialisation sequence, such that TARGET is
12025 initialised to contain VALS. */
12027 void
12028 aarch64_expand_vector_init (rtx target, rtx vals)
12030 machine_mode mode = GET_MODE (target);
12031 scalar_mode inner_mode = GET_MODE_INNER (mode);
12032 /* The number of vector elements. */
12033 int n_elts = GET_MODE_NUNITS (mode);
12034 /* The number of vector elements which are not constant. */
12035 int n_var = 0;
12036 rtx any_const = NULL_RTX;
12037 /* The first element of vals. */
12038 rtx v0 = XVECEXP (vals, 0, 0);
12039 bool all_same = true;
12041 /* Count the number of variable elements to initialise. */
12042 for (int i = 0; i < n_elts; ++i)
12044 rtx x = XVECEXP (vals, 0, i);
12045 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
12046 ++n_var;
12047 else
12048 any_const = x;
12050 all_same &= rtx_equal_p (x, v0);
12053 /* No variable elements, hand off to aarch64_simd_make_constant which knows
12054 how best to handle this. */
12055 if (n_var == 0)
12057 rtx constant = aarch64_simd_make_constant (vals);
12058 if (constant != NULL_RTX)
12060 emit_move_insn (target, constant);
12061 return;
12065 /* Splat a single non-constant element if we can. */
12066 if (all_same)
12068 rtx x = copy_to_mode_reg (inner_mode, v0);
12069 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
12070 return;
12073 enum insn_code icode = optab_handler (vec_set_optab, mode);
12074 gcc_assert (icode != CODE_FOR_nothing);
12076 /* If there are only variable elements, try to optimize
12077 the insertion using dup for the most common element
12078 followed by insertions. */
12080 /* The algorithm will fill matches[*][0] with the earliest matching element,
12081 and matches[X][1] with the count of duplicate elements (if X is the
12082 earliest element which has duplicates). */
12084 if (n_var == n_elts && n_elts <= 16)
12086 int matches[16][2] = {0};
12087 for (int i = 0; i < n_elts; i++)
12089 for (int j = 0; j <= i; j++)
12091 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
12093 matches[i][0] = j;
12094 matches[j][1]++;
12095 break;
12099 int maxelement = 0;
12100 int maxv = 0;
12101 for (int i = 0; i < n_elts; i++)
12102 if (matches[i][1] > maxv)
12104 maxelement = i;
12105 maxv = matches[i][1];
12108 /* Create a duplicate of the most common element. */
12109 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
12110 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
12112 /* Insert the rest. */
12113 for (int i = 0; i < n_elts; i++)
12115 rtx x = XVECEXP (vals, 0, i);
12116 if (matches[i][0] == maxelement)
12117 continue;
12118 x = copy_to_mode_reg (inner_mode, x);
12119 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12121 return;
12124 /* Initialise a vector which is part-variable. We want to first try
12125 to build those lanes which are constant in the most efficient way we
12126 can. */
12127 if (n_var != n_elts)
12129 rtx copy = copy_rtx (vals);
12131 /* Load constant part of vector. We really don't care what goes into the
12132 parts we will overwrite, but we're more likely to be able to load the
12133 constant efficiently if it has fewer, larger, repeating parts
12134 (see aarch64_simd_valid_immediate). */
12135 for (int i = 0; i < n_elts; i++)
12137 rtx x = XVECEXP (vals, 0, i);
12138 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12139 continue;
12140 rtx subst = any_const;
12141 for (int bit = n_elts / 2; bit > 0; bit /= 2)
12143 /* Look in the copied vector, as more elements are const. */
12144 rtx test = XVECEXP (copy, 0, i ^ bit);
12145 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
12147 subst = test;
12148 break;
12151 XVECEXP (copy, 0, i) = subst;
12153 aarch64_expand_vector_init (target, copy);
12156 /* Insert the variable lanes directly. */
12157 for (int i = 0; i < n_elts; i++)
12159 rtx x = XVECEXP (vals, 0, i);
12160 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12161 continue;
12162 x = copy_to_mode_reg (inner_mode, x);
12163 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
12167 static unsigned HOST_WIDE_INT
12168 aarch64_shift_truncation_mask (machine_mode mode)
12170 return
12171 (!SHIFT_COUNT_TRUNCATED
12172 || aarch64_vector_mode_supported_p (mode)
12173 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
12176 /* Select a format to encode pointers in exception handling data. */
12178 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
12180 int type;
12181 switch (aarch64_cmodel)
12183 case AARCH64_CMODEL_TINY:
12184 case AARCH64_CMODEL_TINY_PIC:
12185 case AARCH64_CMODEL_SMALL:
12186 case AARCH64_CMODEL_SMALL_PIC:
12187 case AARCH64_CMODEL_SMALL_SPIC:
12188 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
12189 for everything. */
12190 type = DW_EH_PE_sdata4;
12191 break;
12192 default:
12193 /* No assumptions here. 8-byte relocs required. */
12194 type = DW_EH_PE_sdata8;
12195 break;
12197 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
12200 /* The last .arch and .tune assembly strings that we printed. */
12201 static std::string aarch64_last_printed_arch_string;
12202 static std::string aarch64_last_printed_tune_string;
12204 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
12205 by the function fndecl. */
12207 void
12208 aarch64_declare_function_name (FILE *stream, const char* name,
12209 tree fndecl)
12211 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
12213 struct cl_target_option *targ_options;
12214 if (target_parts)
12215 targ_options = TREE_TARGET_OPTION (target_parts);
12216 else
12217 targ_options = TREE_TARGET_OPTION (target_option_current_node);
12218 gcc_assert (targ_options);
12220 const struct processor *this_arch
12221 = aarch64_get_arch (targ_options->x_explicit_arch);
12223 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
12224 std::string extension
12225 = aarch64_get_extension_string_for_isa_flags (isa_flags,
12226 this_arch->flags);
12227 /* Only update the assembler .arch string if it is distinct from the last
12228 such string we printed. */
12229 std::string to_print = this_arch->name + extension;
12230 if (to_print != aarch64_last_printed_arch_string)
12232 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
12233 aarch64_last_printed_arch_string = to_print;
12236 /* Print the cpu name we're tuning for in the comments, might be
12237 useful to readers of the generated asm. Do it only when it changes
12238 from function to function and verbose assembly is requested. */
12239 const struct processor *this_tune
12240 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
12242 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
12244 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
12245 this_tune->name);
12246 aarch64_last_printed_tune_string = this_tune->name;
12249 /* Don't forget the type directive for ELF. */
12250 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
12251 ASM_OUTPUT_LABEL (stream, name);
12254 /* Implements TARGET_ASM_FILE_START. Output the assembly header. */
12256 static void
12257 aarch64_start_file (void)
12259 struct cl_target_option *default_options
12260 = TREE_TARGET_OPTION (target_option_default_node);
12262 const struct processor *default_arch
12263 = aarch64_get_arch (default_options->x_explicit_arch);
12264 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
12265 std::string extension
12266 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
12267 default_arch->flags);
12269 aarch64_last_printed_arch_string = default_arch->name + extension;
12270 aarch64_last_printed_tune_string = "";
12271 asm_fprintf (asm_out_file, "\t.arch %s\n",
12272 aarch64_last_printed_arch_string.c_str ());
12274 default_file_start ();
12277 /* Emit load exclusive. */
12279 static void
12280 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
12281 rtx mem, rtx model_rtx)
12283 rtx (*gen) (rtx, rtx, rtx);
12285 switch (mode)
12287 case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
12288 case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
12289 case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
12290 case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
12291 default:
12292 gcc_unreachable ();
12295 emit_insn (gen (rval, mem, model_rtx));
12298 /* Emit store exclusive. */
12300 static void
12301 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
12302 rtx rval, rtx mem, rtx model_rtx)
12304 rtx (*gen) (rtx, rtx, rtx, rtx);
12306 switch (mode)
12308 case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
12309 case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
12310 case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
12311 case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
12312 default:
12313 gcc_unreachable ();
12316 emit_insn (gen (bval, rval, mem, model_rtx));
12319 /* Mark the previous jump instruction as unlikely. */
12321 static void
12322 aarch64_emit_unlikely_jump (rtx insn)
12324 rtx_insn *jump = emit_jump_insn (insn);
12325 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
12328 /* Expand a compare and swap pattern. */
12330 void
12331 aarch64_expand_compare_and_swap (rtx operands[])
12333 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
12334 machine_mode mode, cmp_mode;
12335 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
12336 int idx;
12337 gen_cas_fn gen;
12338 const gen_cas_fn split_cas[] =
12340 gen_aarch64_compare_and_swapqi,
12341 gen_aarch64_compare_and_swaphi,
12342 gen_aarch64_compare_and_swapsi,
12343 gen_aarch64_compare_and_swapdi
12345 const gen_cas_fn atomic_cas[] =
12347 gen_aarch64_compare_and_swapqi_lse,
12348 gen_aarch64_compare_and_swaphi_lse,
12349 gen_aarch64_compare_and_swapsi_lse,
12350 gen_aarch64_compare_and_swapdi_lse
12353 bval = operands[0];
12354 rval = operands[1];
12355 mem = operands[2];
12356 oldval = operands[3];
12357 newval = operands[4];
12358 is_weak = operands[5];
12359 mod_s = operands[6];
12360 mod_f = operands[7];
12361 mode = GET_MODE (mem);
12362 cmp_mode = mode;
12364 /* Normally the succ memory model must be stronger than fail, but in the
12365 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12366 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12368 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
12369 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
12370 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
12372 switch (mode)
12374 case E_QImode:
12375 case E_HImode:
12376 /* For short modes, we're going to perform the comparison in SImode,
12377 so do the zero-extension now. */
12378 cmp_mode = SImode;
12379 rval = gen_reg_rtx (SImode);
12380 oldval = convert_modes (SImode, mode, oldval, true);
12381 /* Fall through. */
12383 case E_SImode:
12384 case E_DImode:
12385 /* Force the value into a register if needed. */
12386 if (!aarch64_plus_operand (oldval, mode))
12387 oldval = force_reg (cmp_mode, oldval);
12388 break;
12390 default:
12391 gcc_unreachable ();
12394 switch (mode)
12396 case E_QImode: idx = 0; break;
12397 case E_HImode: idx = 1; break;
12398 case E_SImode: idx = 2; break;
12399 case E_DImode: idx = 3; break;
12400 default:
12401 gcc_unreachable ();
12403 if (TARGET_LSE)
12404 gen = atomic_cas[idx];
12405 else
12406 gen = split_cas[idx];
12408 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
12410 if (mode == QImode || mode == HImode)
12411 emit_move_insn (operands[1], gen_lowpart (mode, rval));
12413 x = gen_rtx_REG (CCmode, CC_REGNUM);
12414 x = gen_rtx_EQ (SImode, x, const0_rtx);
12415 emit_insn (gen_rtx_SET (bval, x));
12418 /* Test whether the target supports using a atomic load-operate instruction.
12419 CODE is the operation and AFTER is TRUE if the data in memory after the
12420 operation should be returned and FALSE if the data before the operation
12421 should be returned. Returns FALSE if the operation isn't supported by the
12422 architecture. */
12424 bool
12425 aarch64_atomic_ldop_supported_p (enum rtx_code code)
12427 if (!TARGET_LSE)
12428 return false;
12430 switch (code)
12432 case SET:
12433 case AND:
12434 case IOR:
12435 case XOR:
12436 case MINUS:
12437 case PLUS:
12438 return true;
12439 default:
12440 return false;
12444 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12445 sequence implementing an atomic operation. */
12447 static void
12448 aarch64_emit_post_barrier (enum memmodel model)
12450 const enum memmodel base_model = memmodel_base (model);
12452 if (is_mm_sync (model)
12453 && (base_model == MEMMODEL_ACQUIRE
12454 || base_model == MEMMODEL_ACQ_REL
12455 || base_model == MEMMODEL_SEQ_CST))
12457 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
12461 /* Emit an atomic compare-and-swap operation. RVAL is the destination register
12462 for the data in memory. EXPECTED is the value expected to be in memory.
12463 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12464 is the memory ordering to use. */
12466 void
12467 aarch64_gen_atomic_cas (rtx rval, rtx mem,
12468 rtx expected, rtx desired,
12469 rtx model)
12471 rtx (*gen) (rtx, rtx, rtx, rtx);
12472 machine_mode mode;
12474 mode = GET_MODE (mem);
12476 switch (mode)
12478 case E_QImode: gen = gen_aarch64_atomic_casqi; break;
12479 case E_HImode: gen = gen_aarch64_atomic_cashi; break;
12480 case E_SImode: gen = gen_aarch64_atomic_cassi; break;
12481 case E_DImode: gen = gen_aarch64_atomic_casdi; break;
12482 default:
12483 gcc_unreachable ();
12486 /* Move the expected value into the CAS destination register. */
12487 emit_insn (gen_rtx_SET (rval, expected));
12489 /* Emit the CAS. */
12490 emit_insn (gen (rval, mem, desired, model));
12492 /* Compare the expected value with the value loaded by the CAS, to establish
12493 whether the swap was made. */
12494 aarch64_gen_compare_reg (EQ, rval, expected);
12497 /* Split a compare and swap pattern. */
12499 void
12500 aarch64_split_compare_and_swap (rtx operands[])
12502 rtx rval, mem, oldval, newval, scratch;
12503 machine_mode mode;
12504 bool is_weak;
12505 rtx_code_label *label1, *label2;
12506 rtx x, cond;
12507 enum memmodel model;
12508 rtx model_rtx;
12510 rval = operands[0];
12511 mem = operands[1];
12512 oldval = operands[2];
12513 newval = operands[3];
12514 is_weak = (operands[4] != const0_rtx);
12515 model_rtx = operands[5];
12516 scratch = operands[7];
12517 mode = GET_MODE (mem);
12518 model = memmodel_from_int (INTVAL (model_rtx));
12520 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12521 loop:
12522 .label1:
12523 LD[A]XR rval, [mem]
12524 CBNZ rval, .label2
12525 ST[L]XR scratch, newval, [mem]
12526 CBNZ scratch, .label1
12527 .label2:
12528 CMP rval, 0. */
12529 bool strong_zero_p = !is_weak && oldval == const0_rtx;
12531 label1 = NULL;
12532 if (!is_weak)
12534 label1 = gen_label_rtx ();
12535 emit_label (label1);
12537 label2 = gen_label_rtx ();
12539 /* The initial load can be relaxed for a __sync operation since a final
12540 barrier will be emitted to stop code hoisting. */
12541 if (is_mm_sync (model))
12542 aarch64_emit_load_exclusive (mode, rval, mem,
12543 GEN_INT (MEMMODEL_RELAXED));
12544 else
12545 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
12547 if (strong_zero_p)
12549 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
12550 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12551 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12552 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12554 else
12556 cond = aarch64_gen_compare_reg (NE, rval, oldval);
12557 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12558 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12559 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
12560 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12563 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
12565 if (!is_weak)
12567 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
12568 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12569 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
12570 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12572 else
12574 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12575 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
12576 emit_insn (gen_rtx_SET (cond, x));
12579 emit_label (label2);
12580 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12581 to set the condition flags. If this is not used it will be removed by
12582 later passes. */
12583 if (strong_zero_p)
12585 cond = gen_rtx_REG (CCmode, CC_REGNUM);
12586 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
12587 emit_insn (gen_rtx_SET (cond, x));
12589 /* Emit any final barrier needed for a __sync operation. */
12590 if (is_mm_sync (model))
12591 aarch64_emit_post_barrier (model);
12594 /* Emit a BIC instruction. */
12596 static void
12597 aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
12599 rtx shift_rtx = GEN_INT (shift);
12600 rtx (*gen) (rtx, rtx, rtx, rtx);
12602 switch (mode)
12604 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
12605 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
12606 default:
12607 gcc_unreachable ();
12610 emit_insn (gen (dst, s2, shift_rtx, s1));
12613 /* Emit an atomic swap. */
12615 static void
12616 aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
12617 rtx mem, rtx model)
12619 rtx (*gen) (rtx, rtx, rtx, rtx);
12621 switch (mode)
12623 case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
12624 case E_HImode: gen = gen_aarch64_atomic_swphi; break;
12625 case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
12626 case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
12627 default:
12628 gcc_unreachable ();
12631 emit_insn (gen (dst, mem, value, model));
12634 /* Operations supported by aarch64_emit_atomic_load_op. */
12636 enum aarch64_atomic_load_op_code
12638 AARCH64_LDOP_PLUS, /* A + B */
12639 AARCH64_LDOP_XOR, /* A ^ B */
12640 AARCH64_LDOP_OR, /* A | B */
12641 AARCH64_LDOP_BIC /* A & ~B */
12644 /* Emit an atomic load-operate. */
12646 static void
12647 aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
12648 machine_mode mode, rtx dst, rtx src,
12649 rtx mem, rtx model)
12651 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
12652 const aarch64_atomic_load_op_fn plus[] =
12654 gen_aarch64_atomic_loadaddqi,
12655 gen_aarch64_atomic_loadaddhi,
12656 gen_aarch64_atomic_loadaddsi,
12657 gen_aarch64_atomic_loadadddi
12659 const aarch64_atomic_load_op_fn eor[] =
12661 gen_aarch64_atomic_loadeorqi,
12662 gen_aarch64_atomic_loadeorhi,
12663 gen_aarch64_atomic_loadeorsi,
12664 gen_aarch64_atomic_loadeordi
12666 const aarch64_atomic_load_op_fn ior[] =
12668 gen_aarch64_atomic_loadsetqi,
12669 gen_aarch64_atomic_loadsethi,
12670 gen_aarch64_atomic_loadsetsi,
12671 gen_aarch64_atomic_loadsetdi
12673 const aarch64_atomic_load_op_fn bic[] =
12675 gen_aarch64_atomic_loadclrqi,
12676 gen_aarch64_atomic_loadclrhi,
12677 gen_aarch64_atomic_loadclrsi,
12678 gen_aarch64_atomic_loadclrdi
12680 aarch64_atomic_load_op_fn gen;
12681 int idx = 0;
12683 switch (mode)
12685 case E_QImode: idx = 0; break;
12686 case E_HImode: idx = 1; break;
12687 case E_SImode: idx = 2; break;
12688 case E_DImode: idx = 3; break;
12689 default:
12690 gcc_unreachable ();
12693 switch (code)
12695 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
12696 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
12697 case AARCH64_LDOP_OR: gen = ior[idx]; break;
12698 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
12699 default:
12700 gcc_unreachable ();
12703 emit_insn (gen (dst, mem, src, model));
12706 /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
12707 location to store the data read from memory. OUT_RESULT is the location to
12708 store the result of the operation. MEM is the memory location to read and
12709 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12710 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12711 be NULL. */
12713 void
12714 aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
12715 rtx mem, rtx value, rtx model_rtx)
12717 machine_mode mode = GET_MODE (mem);
12718 machine_mode wmode = (mode == DImode ? DImode : SImode);
12719 const bool short_mode = (mode < SImode);
12720 aarch64_atomic_load_op_code ldop_code;
12721 rtx src;
12722 rtx x;
12724 if (out_data)
12725 out_data = gen_lowpart (mode, out_data);
12727 if (out_result)
12728 out_result = gen_lowpart (mode, out_result);
12730 /* Make sure the value is in a register, putting it into a destination
12731 register if it needs to be manipulated. */
12732 if (!register_operand (value, mode)
12733 || code == AND || code == MINUS)
12735 src = out_result ? out_result : out_data;
12736 emit_move_insn (src, gen_lowpart (mode, value));
12738 else
12739 src = value;
12740 gcc_assert (register_operand (src, mode));
12742 /* Preprocess the data for the operation as necessary. If the operation is
12743 a SET then emit a swap instruction and finish. */
12744 switch (code)
12746 case SET:
12747 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
12748 return;
12750 case MINUS:
12751 /* Negate the value and treat it as a PLUS. */
12753 rtx neg_src;
12755 /* Resize the value if necessary. */
12756 if (short_mode)
12757 src = gen_lowpart (wmode, src);
12759 neg_src = gen_rtx_NEG (wmode, src);
12760 emit_insn (gen_rtx_SET (src, neg_src));
12762 if (short_mode)
12763 src = gen_lowpart (mode, src);
12765 /* Fall-through. */
12766 case PLUS:
12767 ldop_code = AARCH64_LDOP_PLUS;
12768 break;
12770 case IOR:
12771 ldop_code = AARCH64_LDOP_OR;
12772 break;
12774 case XOR:
12775 ldop_code = AARCH64_LDOP_XOR;
12776 break;
12778 case AND:
12780 rtx not_src;
12782 /* Resize the value if necessary. */
12783 if (short_mode)
12784 src = gen_lowpart (wmode, src);
12786 not_src = gen_rtx_NOT (wmode, src);
12787 emit_insn (gen_rtx_SET (src, not_src));
12789 if (short_mode)
12790 src = gen_lowpart (mode, src);
12792 ldop_code = AARCH64_LDOP_BIC;
12793 break;
12795 default:
12796 /* The operation can't be done with atomic instructions. */
12797 gcc_unreachable ();
12800 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
12802 /* If necessary, calculate the data in memory after the update by redoing the
12803 operation from values in registers. */
12804 if (!out_result)
12805 return;
12807 if (short_mode)
12809 src = gen_lowpart (wmode, src);
12810 out_data = gen_lowpart (wmode, out_data);
12811 out_result = gen_lowpart (wmode, out_result);
12814 x = NULL_RTX;
12816 switch (code)
12818 case MINUS:
12819 case PLUS:
12820 x = gen_rtx_PLUS (wmode, out_data, src);
12821 break;
12822 case IOR:
12823 x = gen_rtx_IOR (wmode, out_data, src);
12824 break;
12825 case XOR:
12826 x = gen_rtx_XOR (wmode, out_data, src);
12827 break;
12828 case AND:
12829 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
12830 return;
12831 default:
12832 gcc_unreachable ();
12835 emit_set_insn (out_result, x);
12837 return;
12840 /* Split an atomic operation. */
12842 void
12843 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
12844 rtx value, rtx model_rtx, rtx cond)
12846 machine_mode mode = GET_MODE (mem);
12847 machine_mode wmode = (mode == DImode ? DImode : SImode);
12848 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
12849 const bool is_sync = is_mm_sync (model);
12850 rtx_code_label *label;
12851 rtx x;
12853 /* Split the atomic operation into a sequence. */
12854 label = gen_label_rtx ();
12855 emit_label (label);
12857 if (new_out)
12858 new_out = gen_lowpart (wmode, new_out);
12859 if (old_out)
12860 old_out = gen_lowpart (wmode, old_out);
12861 else
12862 old_out = new_out;
12863 value = simplify_gen_subreg (wmode, value, mode, 0);
12865 /* The initial load can be relaxed for a __sync operation since a final
12866 barrier will be emitted to stop code hoisting. */
12867 if (is_sync)
12868 aarch64_emit_load_exclusive (mode, old_out, mem,
12869 GEN_INT (MEMMODEL_RELAXED));
12870 else
12871 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
12873 switch (code)
12875 case SET:
12876 new_out = value;
12877 break;
12879 case NOT:
12880 x = gen_rtx_AND (wmode, old_out, value);
12881 emit_insn (gen_rtx_SET (new_out, x));
12882 x = gen_rtx_NOT (wmode, new_out);
12883 emit_insn (gen_rtx_SET (new_out, x));
12884 break;
12886 case MINUS:
12887 if (CONST_INT_P (value))
12889 value = GEN_INT (-INTVAL (value));
12890 code = PLUS;
12892 /* Fall through. */
12894 default:
12895 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
12896 emit_insn (gen_rtx_SET (new_out, x));
12897 break;
12900 aarch64_emit_store_exclusive (mode, cond, mem,
12901 gen_lowpart (mode, new_out), model_rtx);
12903 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
12904 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
12905 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
12906 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
12908 /* Emit any final barrier needed for a __sync operation. */
12909 if (is_sync)
12910 aarch64_emit_post_barrier (model);
12913 static void
12914 aarch64_init_libfuncs (void)
12916 /* Half-precision float operations. The compiler handles all operations
12917 with NULL libfuncs by converting to SFmode. */
12919 /* Conversions. */
12920 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
12921 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
12923 /* Arithmetic. */
12924 set_optab_libfunc (add_optab, HFmode, NULL);
12925 set_optab_libfunc (sdiv_optab, HFmode, NULL);
12926 set_optab_libfunc (smul_optab, HFmode, NULL);
12927 set_optab_libfunc (neg_optab, HFmode, NULL);
12928 set_optab_libfunc (sub_optab, HFmode, NULL);
12930 /* Comparisons. */
12931 set_optab_libfunc (eq_optab, HFmode, NULL);
12932 set_optab_libfunc (ne_optab, HFmode, NULL);
12933 set_optab_libfunc (lt_optab, HFmode, NULL);
12934 set_optab_libfunc (le_optab, HFmode, NULL);
12935 set_optab_libfunc (ge_optab, HFmode, NULL);
12936 set_optab_libfunc (gt_optab, HFmode, NULL);
12937 set_optab_libfunc (unord_optab, HFmode, NULL);
12940 /* Target hook for c_mode_for_suffix. */
12941 static machine_mode
12942 aarch64_c_mode_for_suffix (char suffix)
12944 if (suffix == 'q')
12945 return TFmode;
12947 return VOIDmode;
12950 /* We can only represent floating point constants which will fit in
12951 "quarter-precision" values. These values are characterised by
12952 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12955 (-1)^s * (n/16) * 2^r
12957 Where:
12958 's' is the sign bit.
12959 'n' is an integer in the range 16 <= n <= 31.
12960 'r' is an integer in the range -3 <= r <= 4. */
12962 /* Return true iff X can be represented by a quarter-precision
12963 floating point immediate operand X. Note, we cannot represent 0.0. */
12964 bool
12965 aarch64_float_const_representable_p (rtx x)
12967 /* This represents our current view of how many bits
12968 make up the mantissa. */
12969 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12970 int exponent;
12971 unsigned HOST_WIDE_INT mantissa, mask;
12972 REAL_VALUE_TYPE r, m;
12973 bool fail;
12975 if (!CONST_DOUBLE_P (x))
12976 return false;
12978 /* We don't support HFmode constants yet. */
12979 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
12980 return false;
12982 r = *CONST_DOUBLE_REAL_VALUE (x);
12984 /* We cannot represent infinities, NaNs or +/-zero. We won't
12985 know if we have +zero until we analyse the mantissa, but we
12986 can reject the other invalid values. */
12987 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
12988 || REAL_VALUE_MINUS_ZERO (r))
12989 return false;
12991 /* Extract exponent. */
12992 r = real_value_abs (&r);
12993 exponent = REAL_EXP (&r);
12995 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12996 highest (sign) bit, with a fixed binary point at bit point_pos.
12997 m1 holds the low part of the mantissa, m2 the high part.
12998 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12999 bits for the mantissa, this can fail (low bits will be lost). */
13000 real_ldexp (&m, &r, point_pos - exponent);
13001 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
13003 /* If the low part of the mantissa has bits set we cannot represent
13004 the value. */
13005 if (w.ulow () != 0)
13006 return false;
13007 /* We have rejected the lower HOST_WIDE_INT, so update our
13008 understanding of how many bits lie in the mantissa and
13009 look only at the high HOST_WIDE_INT. */
13010 mantissa = w.elt (1);
13011 point_pos -= HOST_BITS_PER_WIDE_INT;
13013 /* We can only represent values with a mantissa of the form 1.xxxx. */
13014 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
13015 if ((mantissa & mask) != 0)
13016 return false;
13018 /* Having filtered unrepresentable values, we may now remove all
13019 but the highest 5 bits. */
13020 mantissa >>= point_pos - 5;
13022 /* We cannot represent the value 0.0, so reject it. This is handled
13023 elsewhere. */
13024 if (mantissa == 0)
13025 return false;
13027 /* Then, as bit 4 is always set, we can mask it off, leaving
13028 the mantissa in the range [0, 15]. */
13029 mantissa &= ~(1 << 4);
13030 gcc_assert (mantissa <= 15);
13032 /* GCC internally does not use IEEE754-like encoding (where normalized
13033 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
13034 Our mantissa values are shifted 4 places to the left relative to
13035 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
13036 by 5 places to correct for GCC's representation. */
13037 exponent = 5 - exponent;
13039 return (exponent >= 0 && exponent <= 7);
13042 /* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
13043 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
13044 output MOVI/MVNI, ORR or BIC immediate. */
13045 char*
13046 aarch64_output_simd_mov_immediate (rtx const_vector,
13047 machine_mode mode,
13048 unsigned width,
13049 enum simd_immediate_check which)
13051 bool is_valid;
13052 static char templ[40];
13053 const char *mnemonic;
13054 const char *shift_op;
13055 unsigned int lane_count = 0;
13056 char element_char;
13058 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
13060 /* This will return true to show const_vector is legal for use as either
13061 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
13062 It will also update INFO to show how the immediate should be generated.
13063 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
13064 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
13065 &info, which);
13066 gcc_assert (is_valid);
13068 element_char = sizetochar (info.element_width);
13069 lane_count = width / info.element_width;
13071 mode = GET_MODE_INNER (mode);
13072 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
13074 gcc_assert (info.shift == 0 && ! info.mvn);
13075 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
13076 move immediate path. */
13077 if (aarch64_float_const_zero_rtx_p (info.value))
13078 info.value = GEN_INT (0);
13079 else
13081 const unsigned int buf_size = 20;
13082 char float_buf[buf_size] = {'\0'};
13083 real_to_decimal_for_mode (float_buf,
13084 CONST_DOUBLE_REAL_VALUE (info.value),
13085 buf_size, buf_size, 1, mode);
13087 if (lane_count == 1)
13088 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
13089 else
13090 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
13091 lane_count, element_char, float_buf);
13092 return templ;
13096 gcc_assert (CONST_INT_P (info.value));
13098 if (which == AARCH64_CHECK_MOV)
13100 mnemonic = info.mvn ? "mvni" : "movi";
13101 shift_op = info.msl ? "msl" : "lsl";
13102 if (lane_count == 1)
13103 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
13104 mnemonic, UINTVAL (info.value));
13105 else if (info.shift)
13106 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
13107 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
13108 element_char, UINTVAL (info.value), shift_op, info.shift);
13109 else
13110 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
13111 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
13112 element_char, UINTVAL (info.value));
13114 else
13116 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
13117 mnemonic = info.mvn ? "bic" : "orr";
13118 if (info.shift)
13119 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
13120 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
13121 element_char, UINTVAL (info.value), "lsl", info.shift);
13122 else
13123 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
13124 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
13125 element_char, UINTVAL (info.value));
13127 return templ;
13130 char*
13131 aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
13134 /* If a floating point number was passed and we desire to use it in an
13135 integer mode do the conversion to integer. */
13136 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
13138 unsigned HOST_WIDE_INT ival;
13139 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
13140 gcc_unreachable ();
13141 immediate = gen_int_mode (ival, mode);
13144 machine_mode vmode;
13145 /* use a 64 bit mode for everything except for DI/DF mode, where we use
13146 a 128 bit vector mode. */
13147 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
13149 vmode = aarch64_simd_container_mode (mode, width);
13150 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
13151 return aarch64_output_simd_mov_immediate (v_op, vmode, width);
13154 /* Split operands into moves from op[1] + op[2] into op[0]. */
13156 void
13157 aarch64_split_combinev16qi (rtx operands[3])
13159 unsigned int dest = REGNO (operands[0]);
13160 unsigned int src1 = REGNO (operands[1]);
13161 unsigned int src2 = REGNO (operands[2]);
13162 machine_mode halfmode = GET_MODE (operands[1]);
13163 unsigned int halfregs = REG_NREGS (operands[1]);
13164 rtx destlo, desthi;
13166 gcc_assert (halfmode == V16QImode);
13168 if (src1 == dest && src2 == dest + halfregs)
13170 /* No-op move. Can't split to nothing; emit something. */
13171 emit_note (NOTE_INSN_DELETED);
13172 return;
13175 /* Preserve register attributes for variable tracking. */
13176 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
13177 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
13178 GET_MODE_SIZE (halfmode));
13180 /* Special case of reversed high/low parts. */
13181 if (reg_overlap_mentioned_p (operands[2], destlo)
13182 && reg_overlap_mentioned_p (operands[1], desthi))
13184 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13185 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
13186 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
13188 else if (!reg_overlap_mentioned_p (operands[2], destlo))
13190 /* Try to avoid unnecessary moves if part of the result
13191 is in the right place already. */
13192 if (src1 != dest)
13193 emit_move_insn (destlo, operands[1]);
13194 if (src2 != dest + halfregs)
13195 emit_move_insn (desthi, operands[2]);
13197 else
13199 if (src2 != dest + halfregs)
13200 emit_move_insn (desthi, operands[2]);
13201 if (src1 != dest)
13202 emit_move_insn (destlo, operands[1]);
13206 /* vec_perm support. */
13208 #define MAX_VECT_LEN 16
13210 struct expand_vec_perm_d
13212 rtx target, op0, op1;
13213 auto_vec_perm_indices perm;
13214 machine_mode vmode;
13215 bool one_vector_p;
13216 bool testing_p;
13219 /* Generate a variable permutation. */
13221 static void
13222 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
13224 machine_mode vmode = GET_MODE (target);
13225 bool one_vector_p = rtx_equal_p (op0, op1);
13227 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
13228 gcc_checking_assert (GET_MODE (op0) == vmode);
13229 gcc_checking_assert (GET_MODE (op1) == vmode);
13230 gcc_checking_assert (GET_MODE (sel) == vmode);
13231 gcc_checking_assert (TARGET_SIMD);
13233 if (one_vector_p)
13235 if (vmode == V8QImode)
13237 /* Expand the argument to a V16QI mode by duplicating it. */
13238 rtx pair = gen_reg_rtx (V16QImode);
13239 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
13240 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13242 else
13244 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
13247 else
13249 rtx pair;
13251 if (vmode == V8QImode)
13253 pair = gen_reg_rtx (V16QImode);
13254 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
13255 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
13257 else
13259 pair = gen_reg_rtx (OImode);
13260 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
13261 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
13266 /* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
13267 NELT is the number of elements in the vector. */
13269 void
13270 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
13271 unsigned int nelt)
13273 machine_mode vmode = GET_MODE (target);
13274 bool one_vector_p = rtx_equal_p (op0, op1);
13275 rtx mask;
13277 /* The TBL instruction does not use a modulo index, so we must take care
13278 of that ourselves. */
13279 mask = aarch64_simd_gen_const_vector_dup (vmode,
13280 one_vector_p ? nelt - 1 : 2 * nelt - 1);
13281 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
13283 /* For big-endian, we also need to reverse the index within the vector
13284 (but not which vector). */
13285 if (BYTES_BIG_ENDIAN)
13287 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
13288 if (!one_vector_p)
13289 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
13290 sel = expand_simple_binop (vmode, XOR, sel, mask,
13291 NULL, 0, OPTAB_LIB_WIDEN);
13293 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
13296 /* Recognize patterns suitable for the TRN instructions. */
13297 static bool
13298 aarch64_evpc_trn (struct expand_vec_perm_d *d)
13300 unsigned int i, odd, mask, nelt = d->perm.length ();
13301 rtx out, in0, in1, x;
13302 machine_mode vmode = d->vmode;
13304 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13305 return false;
13307 /* Note that these are little-endian tests.
13308 We correct for big-endian later. */
13309 if (d->perm[0] == 0)
13310 odd = 0;
13311 else if (d->perm[0] == 1)
13312 odd = 1;
13313 else
13314 return false;
13315 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13317 for (i = 0; i < nelt; i += 2)
13319 if (d->perm[i] != i + odd)
13320 return false;
13321 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
13322 return false;
13325 /* Success! */
13326 if (d->testing_p)
13327 return true;
13329 in0 = d->op0;
13330 in1 = d->op1;
13331 if (BYTES_BIG_ENDIAN)
13333 x = in0, in0 = in1, in1 = x;
13334 odd = !odd;
13336 out = d->target;
13338 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
13339 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
13340 return true;
13343 /* Recognize patterns suitable for the UZP instructions. */
13344 static bool
13345 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
13347 unsigned int i, odd, mask, nelt = d->perm.length ();
13348 rtx out, in0, in1, x;
13349 machine_mode vmode = d->vmode;
13351 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13352 return false;
13354 /* Note that these are little-endian tests.
13355 We correct for big-endian later. */
13356 if (d->perm[0] == 0)
13357 odd = 0;
13358 else if (d->perm[0] == 1)
13359 odd = 1;
13360 else
13361 return false;
13362 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13364 for (i = 0; i < nelt; i++)
13366 unsigned elt = (i * 2 + odd) & mask;
13367 if (d->perm[i] != elt)
13368 return false;
13371 /* Success! */
13372 if (d->testing_p)
13373 return true;
13375 in0 = d->op0;
13376 in1 = d->op1;
13377 if (BYTES_BIG_ENDIAN)
13379 x = in0, in0 = in1, in1 = x;
13380 odd = !odd;
13382 out = d->target;
13384 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
13385 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
13386 return true;
13389 /* Recognize patterns suitable for the ZIP instructions. */
13390 static bool
13391 aarch64_evpc_zip (struct expand_vec_perm_d *d)
13393 unsigned int i, high, mask, nelt = d->perm.length ();
13394 rtx out, in0, in1, x;
13395 machine_mode vmode = d->vmode;
13397 if (GET_MODE_UNIT_SIZE (vmode) > 8)
13398 return false;
13400 /* Note that these are little-endian tests.
13401 We correct for big-endian later. */
13402 high = nelt / 2;
13403 if (d->perm[0] == high)
13404 /* Do Nothing. */
13406 else if (d->perm[0] == 0)
13407 high = 0;
13408 else
13409 return false;
13410 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
13412 for (i = 0; i < nelt / 2; i++)
13414 unsigned elt = (i + high) & mask;
13415 if (d->perm[i * 2] != elt)
13416 return false;
13417 elt = (elt + nelt) & mask;
13418 if (d->perm[i * 2 + 1] != elt)
13419 return false;
13422 /* Success! */
13423 if (d->testing_p)
13424 return true;
13426 in0 = d->op0;
13427 in1 = d->op1;
13428 if (BYTES_BIG_ENDIAN)
13430 x = in0, in0 = in1, in1 = x;
13431 high = !high;
13433 out = d->target;
13435 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
13436 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
13437 return true;
13440 /* Recognize patterns for the EXT insn. */
13442 static bool
13443 aarch64_evpc_ext (struct expand_vec_perm_d *d)
13445 unsigned int i, nelt = d->perm.length ();
13446 rtx offset;
13448 unsigned int location = d->perm[0]; /* Always < nelt. */
13450 /* Check if the extracted indices are increasing by one. */
13451 for (i = 1; i < nelt; i++)
13453 unsigned int required = location + i;
13454 if (d->one_vector_p)
13456 /* We'll pass the same vector in twice, so allow indices to wrap. */
13457 required &= (nelt - 1);
13459 if (d->perm[i] != required)
13460 return false;
13463 /* Success! */
13464 if (d->testing_p)
13465 return true;
13467 /* The case where (location == 0) is a no-op for both big- and little-endian,
13468 and is removed by the mid-end at optimization levels -O1 and higher. */
13470 if (BYTES_BIG_ENDIAN && (location != 0))
13472 /* After setup, we want the high elements of the first vector (stored
13473 at the LSB end of the register), and the low elements of the second
13474 vector (stored at the MSB end of the register). So swap. */
13475 std::swap (d->op0, d->op1);
13476 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13477 location = nelt - location;
13480 offset = GEN_INT (location);
13481 emit_set_insn (d->target,
13482 gen_rtx_UNSPEC (d->vmode,
13483 gen_rtvec (3, d->op0, d->op1, offset),
13484 UNSPEC_EXT));
13485 return true;
13488 /* Recognize patterns for the REV insns. */
13490 static bool
13491 aarch64_evpc_rev (struct expand_vec_perm_d *d)
13493 unsigned int i, j, diff, size, unspec, nelt = d->perm.length ();
13495 if (!d->one_vector_p)
13496 return false;
13498 diff = d->perm[0];
13499 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
13500 if (size == 8)
13501 unspec = UNSPEC_REV64;
13502 else if (size == 4)
13503 unspec = UNSPEC_REV32;
13504 else if (size == 2)
13505 unspec = UNSPEC_REV16;
13506 else
13507 return false;
13509 for (i = 0; i < nelt ; i += diff + 1)
13510 for (j = 0; j <= diff; j += 1)
13512 /* This is guaranteed to be true as the value of diff
13513 is 7, 3, 1 and we should have enough elements in the
13514 queue to generate this. Getting a vector mask with a
13515 value of diff other than these values implies that
13516 something is wrong by the time we get here. */
13517 gcc_assert (i + j < nelt);
13518 if (d->perm[i + j] != i + diff - j)
13519 return false;
13522 /* Success! */
13523 if (d->testing_p)
13524 return true;
13526 emit_set_insn (d->target, gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0),
13527 unspec));
13528 return true;
13531 static bool
13532 aarch64_evpc_dup (struct expand_vec_perm_d *d)
13534 rtx out = d->target;
13535 rtx in0;
13536 machine_mode vmode = d->vmode;
13537 unsigned int i, elt, nelt = d->perm.length ();
13538 rtx lane;
13540 elt = d->perm[0];
13541 for (i = 1; i < nelt; i++)
13543 if (elt != d->perm[i])
13544 return false;
13547 /* The generic preparation in aarch64_expand_vec_perm_const_1
13548 swaps the operand order and the permute indices if it finds
13549 d->perm[0] to be in the second operand. Thus, we can always
13550 use d->op0 and need not do any extra arithmetic to get the
13551 correct lane number. */
13552 in0 = d->op0;
13553 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
13555 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
13556 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
13557 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
13558 return true;
13561 static bool
13562 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
13564 rtx rperm[MAX_VECT_LEN], sel;
13565 machine_mode vmode = d->vmode;
13566 unsigned int i, nelt = d->perm.length ();
13568 if (d->testing_p)
13569 return true;
13571 /* Generic code will try constant permutation twice. Once with the
13572 original mode and again with the elements lowered to QImode.
13573 So wait and don't do the selector expansion ourselves. */
13574 if (vmode != V8QImode && vmode != V16QImode)
13575 return false;
13577 for (i = 0; i < nelt; ++i)
13579 int nunits = GET_MODE_NUNITS (vmode);
13581 /* If big-endian and two vectors we end up with a weird mixed-endian
13582 mode on NEON. Reverse the index within each word but not the word
13583 itself. */
13584 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
13585 : d->perm[i]);
13587 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
13588 sel = force_reg (vmode, sel);
13590 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
13591 return true;
13594 static bool
13595 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
13597 /* The pattern matching functions above are written to look for a small
13598 number to begin the sequence (0, 1, N/2). If we begin with an index
13599 from the second operand, we can swap the operands. */
13600 unsigned int nelt = d->perm.length ();
13601 if (d->perm[0] >= nelt)
13603 gcc_assert (nelt == (nelt & -nelt));
13604 for (unsigned int i = 0; i < nelt; ++i)
13605 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
13607 std::swap (d->op0, d->op1);
13610 if (TARGET_SIMD && nelt > 1)
13612 if (aarch64_evpc_rev (d))
13613 return true;
13614 else if (aarch64_evpc_ext (d))
13615 return true;
13616 else if (aarch64_evpc_dup (d))
13617 return true;
13618 else if (aarch64_evpc_zip (d))
13619 return true;
13620 else if (aarch64_evpc_uzp (d))
13621 return true;
13622 else if (aarch64_evpc_trn (d))
13623 return true;
13624 return aarch64_evpc_tbl (d);
13626 return false;
13629 /* Expand a vec_perm_const pattern with the operands given by TARGET,
13630 OP0, OP1 and SEL. NELT is the number of elements in the vector. */
13632 bool
13633 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel,
13634 unsigned int nelt)
13636 struct expand_vec_perm_d d;
13637 unsigned int i, which;
13639 d.target = target;
13640 d.op0 = op0;
13641 d.op1 = op1;
13643 d.vmode = GET_MODE (target);
13644 gcc_assert (VECTOR_MODE_P (d.vmode));
13645 d.testing_p = false;
13647 d.perm.reserve (nelt);
13648 for (i = which = 0; i < nelt; ++i)
13650 rtx e = XVECEXP (sel, 0, i);
13651 unsigned int ei = INTVAL (e) & (2 * nelt - 1);
13652 which |= (ei < nelt ? 1 : 2);
13653 d.perm.quick_push (ei);
13656 switch (which)
13658 default:
13659 gcc_unreachable ();
13661 case 3:
13662 d.one_vector_p = false;
13663 if (!rtx_equal_p (op0, op1))
13664 break;
13666 /* The elements of PERM do not suggest that only the first operand
13667 is used, but both operands are identical. Allow easier matching
13668 of the permutation by folding the permutation into the single
13669 input vector. */
13670 /* Fall Through. */
13671 case 2:
13672 for (i = 0; i < nelt; ++i)
13673 d.perm[i] &= nelt - 1;
13674 d.op0 = op1;
13675 d.one_vector_p = true;
13676 break;
13678 case 1:
13679 d.op1 = op0;
13680 d.one_vector_p = true;
13681 break;
13684 return aarch64_expand_vec_perm_const_1 (&d);
13687 static bool
13688 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
13690 struct expand_vec_perm_d d;
13691 unsigned int i, nelt, which;
13692 bool ret;
13694 d.vmode = vmode;
13695 d.testing_p = true;
13696 d.perm.safe_splice (sel);
13698 /* Calculate whether all elements are in one vector. */
13699 nelt = sel.length ();
13700 for (i = which = 0; i < nelt; ++i)
13702 unsigned int e = d.perm[i];
13703 gcc_assert (e < 2 * nelt);
13704 which |= (e < nelt ? 1 : 2);
13707 /* If all elements are from the second vector, reindex as if from the
13708 first vector. */
13709 if (which == 2)
13710 for (i = 0; i < nelt; ++i)
13711 d.perm[i] -= nelt;
13713 /* Check whether the mask can be applied to a single vector. */
13714 d.one_vector_p = (which != 3);
13716 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
13717 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
13718 if (!d.one_vector_p)
13719 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
13721 start_sequence ();
13722 ret = aarch64_expand_vec_perm_const_1 (&d);
13723 end_sequence ();
13725 return ret;
13728 /* Generate a byte permute mask for a register of mode MODE,
13729 which has NUNITS units. */
13732 aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
13734 /* We have to reverse each vector because we dont have
13735 a permuted load that can reverse-load according to ABI rules. */
13736 rtx mask;
13737 rtvec v = rtvec_alloc (16);
13738 unsigned int i, j;
13739 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
13741 gcc_assert (BYTES_BIG_ENDIAN);
13742 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
13744 for (i = 0; i < nunits; i++)
13745 for (j = 0; j < usize; j++)
13746 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
13747 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
13748 return force_reg (V16QImode, mask);
13751 /* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
13752 true. However due to issues with register allocation it is preferable
13753 to avoid tieing integer scalar and FP scalar modes. Executing integer
13754 operations in general registers is better than treating them as scalar
13755 vector operations. This reduces latency and avoids redundant int<->FP
13756 moves. So tie modes if they are either the same class, or vector modes
13757 with other vector modes, vector structs or any scalar mode. */
13759 static bool
13760 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13762 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
13763 return true;
13765 /* We specifically want to allow elements of "structure" modes to
13766 be tieable to the structure. This more general condition allows
13767 other rarer situations too. */
13768 if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2))
13769 return true;
13771 /* Also allow any scalar modes with vectors. */
13772 if (aarch64_vector_mode_supported_p (mode1)
13773 || aarch64_vector_mode_supported_p (mode2))
13774 return true;
13776 return false;
13779 /* Return a new RTX holding the result of moving POINTER forward by
13780 AMOUNT bytes. */
13782 static rtx
13783 aarch64_move_pointer (rtx pointer, int amount)
13785 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
13787 return adjust_automodify_address (pointer, GET_MODE (pointer),
13788 next, amount);
13791 /* Return a new RTX holding the result of moving POINTER forward by the
13792 size of the mode it points to. */
13794 static rtx
13795 aarch64_progress_pointer (rtx pointer)
13797 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
13799 return aarch64_move_pointer (pointer, amount);
13802 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13803 MODE bytes. */
13805 static void
13806 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
13807 machine_mode mode)
13809 rtx reg = gen_reg_rtx (mode);
13811 /* "Cast" the pointers to the correct mode. */
13812 *src = adjust_address (*src, mode, 0);
13813 *dst = adjust_address (*dst, mode, 0);
13814 /* Emit the memcpy. */
13815 emit_move_insn (reg, *src);
13816 emit_move_insn (*dst, reg);
13817 /* Move the pointers forward. */
13818 *src = aarch64_progress_pointer (*src);
13819 *dst = aarch64_progress_pointer (*dst);
13822 /* Expand movmem, as if from a __builtin_memcpy. Return true if
13823 we succeed, otherwise return false. */
13825 bool
13826 aarch64_expand_movmem (rtx *operands)
13828 unsigned int n;
13829 rtx dst = operands[0];
13830 rtx src = operands[1];
13831 rtx base;
13832 bool speed_p = !optimize_function_for_size_p (cfun);
13834 /* When optimizing for size, give a better estimate of the length of a
13835 memcpy call, but use the default otherwise. */
13836 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
13838 /* We can't do anything smart if the amount to copy is not constant. */
13839 if (!CONST_INT_P (operands[2]))
13840 return false;
13842 n = UINTVAL (operands[2]);
13844 /* Try to keep the number of instructions low. For cases below 16 bytes we
13845 need to make at most two moves. For cases above 16 bytes it will be one
13846 move for each 16 byte chunk, then at most two additional moves. */
13847 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
13848 return false;
13850 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13851 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
13853 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
13854 src = adjust_automodify_address (src, VOIDmode, base, 0);
13856 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13857 1-byte chunk. */
13858 if (n < 4)
13860 if (n >= 2)
13862 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13863 n -= 2;
13866 if (n == 1)
13867 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13869 return true;
13872 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13873 4-byte chunk, partially overlapping with the previously copied chunk. */
13874 if (n < 8)
13876 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13877 n -= 4;
13878 if (n > 0)
13880 int move = n - 4;
13882 src = aarch64_move_pointer (src, move);
13883 dst = aarch64_move_pointer (dst, move);
13884 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13886 return true;
13889 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13890 them, then (if applicable) an 8-byte chunk. */
13891 while (n >= 8)
13893 if (n / 16)
13895 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
13896 n -= 16;
13898 else
13900 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13901 n -= 8;
13905 /* Finish the final bytes of the copy. We can always do this in one
13906 instruction. We either copy the exact amount we need, or partially
13907 overlap with the previous chunk we copied and copy 8-bytes. */
13908 if (n == 0)
13909 return true;
13910 else if (n == 1)
13911 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
13912 else if (n == 2)
13913 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
13914 else if (n == 4)
13915 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13916 else
13918 if (n == 3)
13920 src = aarch64_move_pointer (src, -1);
13921 dst = aarch64_move_pointer (dst, -1);
13922 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
13924 else
13926 int move = n - 8;
13928 src = aarch64_move_pointer (src, move);
13929 dst = aarch64_move_pointer (dst, move);
13930 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
13934 return true;
13937 /* Split a DImode store of a CONST_INT SRC to MEM DST as two
13938 SImode stores. Handle the case when the constant has identical
13939 bottom and top halves. This is beneficial when the two stores can be
13940 merged into an STP and we avoid synthesising potentially expensive
13941 immediates twice. Return true if such a split is possible. */
13943 bool
13944 aarch64_split_dimode_const_store (rtx dst, rtx src)
13946 rtx lo = gen_lowpart (SImode, src);
13947 rtx hi = gen_highpart_mode (SImode, DImode, src);
13949 bool size_p = optimize_function_for_size_p (cfun);
13951 if (!rtx_equal_p (lo, hi))
13952 return false;
13954 unsigned int orig_cost
13955 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
13956 unsigned int lo_cost
13957 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
13959 /* We want to transform:
13960 MOV x1, 49370
13961 MOVK x1, 0x140, lsl 16
13962 MOVK x1, 0xc0da, lsl 32
13963 MOVK x1, 0x140, lsl 48
13964 STR x1, [x0]
13965 into:
13966 MOV w1, 49370
13967 MOVK w1, 0x140, lsl 16
13968 STP w1, w1, [x0]
13969 So we want to perform this only when we save two instructions
13970 or more. When optimizing for size, however, accept any code size
13971 savings we can. */
13972 if (size_p && orig_cost <= lo_cost)
13973 return false;
13975 if (!size_p
13976 && (orig_cost <= lo_cost + 1))
13977 return false;
13979 rtx mem_lo = adjust_address (dst, SImode, 0);
13980 if (!aarch64_mem_pair_operand (mem_lo, SImode))
13981 return false;
13983 rtx tmp_reg = gen_reg_rtx (SImode);
13984 aarch64_expand_mov_immediate (tmp_reg, lo);
13985 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
13986 /* Don't emit an explicit store pair as this may not be always profitable.
13987 Let the sched-fusion logic decide whether to merge them. */
13988 emit_move_insn (mem_lo, tmp_reg);
13989 emit_move_insn (mem_hi, tmp_reg);
13991 return true;
13994 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13996 static unsigned HOST_WIDE_INT
13997 aarch64_asan_shadow_offset (void)
13999 return (HOST_WIDE_INT_1 << 36);
14002 static rtx
14003 aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
14004 int code, tree treeop0, tree treeop1)
14006 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
14007 rtx op0, op1;
14008 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
14009 insn_code icode;
14010 struct expand_operand ops[4];
14012 start_sequence ();
14013 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14015 op_mode = GET_MODE (op0);
14016 if (op_mode == VOIDmode)
14017 op_mode = GET_MODE (op1);
14019 switch (op_mode)
14021 case E_QImode:
14022 case E_HImode:
14023 case E_SImode:
14024 cmp_mode = SImode;
14025 icode = CODE_FOR_cmpsi;
14026 break;
14028 case E_DImode:
14029 cmp_mode = DImode;
14030 icode = CODE_FOR_cmpdi;
14031 break;
14033 case E_SFmode:
14034 cmp_mode = SFmode;
14035 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14036 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
14037 break;
14039 case E_DFmode:
14040 cmp_mode = DFmode;
14041 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
14042 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
14043 break;
14045 default:
14046 end_sequence ();
14047 return NULL_RTX;
14050 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
14051 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
14052 if (!op0 || !op1)
14054 end_sequence ();
14055 return NULL_RTX;
14057 *prep_seq = get_insns ();
14058 end_sequence ();
14060 create_fixed_operand (&ops[0], op0);
14061 create_fixed_operand (&ops[1], op1);
14063 start_sequence ();
14064 if (!maybe_expand_insn (icode, 2, ops))
14066 end_sequence ();
14067 return NULL_RTX;
14069 *gen_seq = get_insns ();
14070 end_sequence ();
14072 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
14073 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
14076 static rtx
14077 aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
14078 int cmp_code, tree treeop0, tree treeop1, int bit_code)
14080 rtx op0, op1, target;
14081 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
14082 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
14083 insn_code icode;
14084 struct expand_operand ops[6];
14085 int aarch64_cond;
14087 push_to_sequence (*prep_seq);
14088 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
14090 op_mode = GET_MODE (op0);
14091 if (op_mode == VOIDmode)
14092 op_mode = GET_MODE (op1);
14094 switch (op_mode)
14096 case E_QImode:
14097 case E_HImode:
14098 case E_SImode:
14099 cmp_mode = SImode;
14100 icode = CODE_FOR_ccmpsi;
14101 break;
14103 case E_DImode:
14104 cmp_mode = DImode;
14105 icode = CODE_FOR_ccmpdi;
14106 break;
14108 case E_SFmode:
14109 cmp_mode = SFmode;
14110 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14111 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
14112 break;
14114 case E_DFmode:
14115 cmp_mode = DFmode;
14116 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
14117 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
14118 break;
14120 default:
14121 end_sequence ();
14122 return NULL_RTX;
14125 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
14126 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
14127 if (!op0 || !op1)
14129 end_sequence ();
14130 return NULL_RTX;
14132 *prep_seq = get_insns ();
14133 end_sequence ();
14135 target = gen_rtx_REG (cc_mode, CC_REGNUM);
14136 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
14138 if (bit_code != AND)
14140 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
14141 GET_MODE (XEXP (prev, 0))),
14142 VOIDmode, XEXP (prev, 0), const0_rtx);
14143 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
14146 create_fixed_operand (&ops[0], XEXP (prev, 0));
14147 create_fixed_operand (&ops[1], target);
14148 create_fixed_operand (&ops[2], op0);
14149 create_fixed_operand (&ops[3], op1);
14150 create_fixed_operand (&ops[4], prev);
14151 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
14153 push_to_sequence (*gen_seq);
14154 if (!maybe_expand_insn (icode, 6, ops))
14156 end_sequence ();
14157 return NULL_RTX;
14160 *gen_seq = get_insns ();
14161 end_sequence ();
14163 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
14166 #undef TARGET_GEN_CCMP_FIRST
14167 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14169 #undef TARGET_GEN_CCMP_NEXT
14170 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14172 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14173 instruction fusion of some sort. */
14175 static bool
14176 aarch64_macro_fusion_p (void)
14178 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
14182 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14183 should be kept together during scheduling. */
14185 static bool
14186 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
14188 rtx set_dest;
14189 rtx prev_set = single_set (prev);
14190 rtx curr_set = single_set (curr);
14191 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14192 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
14194 if (!aarch64_macro_fusion_p ())
14195 return false;
14197 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
14199 /* We are trying to match:
14200 prev (mov) == (set (reg r0) (const_int imm16))
14201 curr (movk) == (set (zero_extract (reg r0)
14202 (const_int 16)
14203 (const_int 16))
14204 (const_int imm16_1)) */
14206 set_dest = SET_DEST (curr_set);
14208 if (GET_CODE (set_dest) == ZERO_EXTRACT
14209 && CONST_INT_P (SET_SRC (curr_set))
14210 && CONST_INT_P (SET_SRC (prev_set))
14211 && CONST_INT_P (XEXP (set_dest, 2))
14212 && INTVAL (XEXP (set_dest, 2)) == 16
14213 && REG_P (XEXP (set_dest, 0))
14214 && REG_P (SET_DEST (prev_set))
14215 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
14217 return true;
14221 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
14224 /* We're trying to match:
14225 prev (adrp) == (set (reg r1)
14226 (high (symbol_ref ("SYM"))))
14227 curr (add) == (set (reg r0)
14228 (lo_sum (reg r1)
14229 (symbol_ref ("SYM"))))
14230 Note that r0 need not necessarily be the same as r1, especially
14231 during pre-regalloc scheduling. */
14233 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14234 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14236 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
14237 && REG_P (XEXP (SET_SRC (curr_set), 0))
14238 && REGNO (XEXP (SET_SRC (curr_set), 0))
14239 == REGNO (SET_DEST (prev_set))
14240 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
14241 XEXP (SET_SRC (curr_set), 1)))
14242 return true;
14246 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
14249 /* We're trying to match:
14250 prev (movk) == (set (zero_extract (reg r0)
14251 (const_int 16)
14252 (const_int 32))
14253 (const_int imm16_1))
14254 curr (movk) == (set (zero_extract (reg r0)
14255 (const_int 16)
14256 (const_int 48))
14257 (const_int imm16_2)) */
14259 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
14260 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
14261 && REG_P (XEXP (SET_DEST (prev_set), 0))
14262 && REG_P (XEXP (SET_DEST (curr_set), 0))
14263 && REGNO (XEXP (SET_DEST (prev_set), 0))
14264 == REGNO (XEXP (SET_DEST (curr_set), 0))
14265 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
14266 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
14267 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
14268 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
14269 && CONST_INT_P (SET_SRC (prev_set))
14270 && CONST_INT_P (SET_SRC (curr_set)))
14271 return true;
14274 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
14276 /* We're trying to match:
14277 prev (adrp) == (set (reg r0)
14278 (high (symbol_ref ("SYM"))))
14279 curr (ldr) == (set (reg r1)
14280 (mem (lo_sum (reg r0)
14281 (symbol_ref ("SYM")))))
14283 curr (ldr) == (set (reg r1)
14284 (zero_extend (mem
14285 (lo_sum (reg r0)
14286 (symbol_ref ("SYM")))))) */
14287 if (satisfies_constraint_Ush (SET_SRC (prev_set))
14288 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
14290 rtx curr_src = SET_SRC (curr_set);
14292 if (GET_CODE (curr_src) == ZERO_EXTEND)
14293 curr_src = XEXP (curr_src, 0);
14295 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
14296 && REG_P (XEXP (XEXP (curr_src, 0), 0))
14297 && REGNO (XEXP (XEXP (curr_src, 0), 0))
14298 == REGNO (SET_DEST (prev_set))
14299 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
14300 XEXP (SET_SRC (prev_set), 0)))
14301 return true;
14305 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
14306 && aarch_crypto_can_dual_issue (prev, curr))
14307 return true;
14309 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
14310 && any_condjump_p (curr))
14312 enum attr_type prev_type = get_attr_type (prev);
14314 unsigned int condreg1, condreg2;
14315 rtx cc_reg_1;
14316 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
14317 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
14319 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
14320 && prev
14321 && modified_in_p (cc_reg_1, prev))
14323 /* FIXME: this misses some which is considered simple arthematic
14324 instructions for ThunderX. Simple shifts are missed here. */
14325 if (prev_type == TYPE_ALUS_SREG
14326 || prev_type == TYPE_ALUS_IMM
14327 || prev_type == TYPE_LOGICS_REG
14328 || prev_type == TYPE_LOGICS_IMM)
14329 return true;
14333 if (prev_set
14334 && curr_set
14335 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
14336 && any_condjump_p (curr))
14338 /* We're trying to match:
14339 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
14340 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
14341 (const_int 0))
14342 (label_ref ("SYM"))
14343 (pc)) */
14344 if (SET_DEST (curr_set) == (pc_rtx)
14345 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
14346 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
14347 && REG_P (SET_DEST (prev_set))
14348 && REGNO (SET_DEST (prev_set))
14349 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
14351 /* Fuse ALU operations followed by conditional branch instruction. */
14352 switch (get_attr_type (prev))
14354 case TYPE_ALU_IMM:
14355 case TYPE_ALU_SREG:
14356 case TYPE_ADC_REG:
14357 case TYPE_ADC_IMM:
14358 case TYPE_ADCS_REG:
14359 case TYPE_ADCS_IMM:
14360 case TYPE_LOGIC_REG:
14361 case TYPE_LOGIC_IMM:
14362 case TYPE_CSEL:
14363 case TYPE_ADR:
14364 case TYPE_MOV_IMM:
14365 case TYPE_SHIFT_REG:
14366 case TYPE_SHIFT_IMM:
14367 case TYPE_BFM:
14368 case TYPE_RBIT:
14369 case TYPE_REV:
14370 case TYPE_EXTEND:
14371 return true;
14373 default:;
14378 return false;
14381 /* Return true iff the instruction fusion described by OP is enabled. */
14383 bool
14384 aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
14386 return (aarch64_tune_params.fusible_ops & op) != 0;
14389 /* If MEM is in the form of [base+offset], extract the two parts
14390 of address and set to BASE and OFFSET, otherwise return false
14391 after clearing BASE and OFFSET. */
14393 bool
14394 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
14396 rtx addr;
14398 gcc_assert (MEM_P (mem));
14400 addr = XEXP (mem, 0);
14402 if (REG_P (addr))
14404 *base = addr;
14405 *offset = const0_rtx;
14406 return true;
14409 if (GET_CODE (addr) == PLUS
14410 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
14412 *base = XEXP (addr, 0);
14413 *offset = XEXP (addr, 1);
14414 return true;
14417 *base = NULL_RTX;
14418 *offset = NULL_RTX;
14420 return false;
14423 /* Types for scheduling fusion. */
14424 enum sched_fusion_type
14426 SCHED_FUSION_NONE = 0,
14427 SCHED_FUSION_LD_SIGN_EXTEND,
14428 SCHED_FUSION_LD_ZERO_EXTEND,
14429 SCHED_FUSION_LD,
14430 SCHED_FUSION_ST,
14431 SCHED_FUSION_NUM
14434 /* If INSN is a load or store of address in the form of [base+offset],
14435 extract the two parts and set to BASE and OFFSET. Return scheduling
14436 fusion type this INSN is. */
14438 static enum sched_fusion_type
14439 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
14441 rtx x, dest, src;
14442 enum sched_fusion_type fusion = SCHED_FUSION_LD;
14444 gcc_assert (INSN_P (insn));
14445 x = PATTERN (insn);
14446 if (GET_CODE (x) != SET)
14447 return SCHED_FUSION_NONE;
14449 src = SET_SRC (x);
14450 dest = SET_DEST (x);
14452 machine_mode dest_mode = GET_MODE (dest);
14454 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
14455 return SCHED_FUSION_NONE;
14457 if (GET_CODE (src) == SIGN_EXTEND)
14459 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
14460 src = XEXP (src, 0);
14461 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14462 return SCHED_FUSION_NONE;
14464 else if (GET_CODE (src) == ZERO_EXTEND)
14466 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
14467 src = XEXP (src, 0);
14468 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
14469 return SCHED_FUSION_NONE;
14472 if (GET_CODE (src) == MEM && REG_P (dest))
14473 extract_base_offset_in_addr (src, base, offset);
14474 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
14476 fusion = SCHED_FUSION_ST;
14477 extract_base_offset_in_addr (dest, base, offset);
14479 else
14480 return SCHED_FUSION_NONE;
14482 if (*base == NULL_RTX || *offset == NULL_RTX)
14483 fusion = SCHED_FUSION_NONE;
14485 return fusion;
14488 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14490 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14491 and PRI are only calculated for these instructions. For other instruction,
14492 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14493 type instruction fusion can be added by returning different priorities.
14495 It's important that irrelevant instructions get the largest FUSION_PRI. */
14497 static void
14498 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
14499 int *fusion_pri, int *pri)
14501 int tmp, off_val;
14502 rtx base, offset;
14503 enum sched_fusion_type fusion;
14505 gcc_assert (INSN_P (insn));
14507 tmp = max_pri - 1;
14508 fusion = fusion_load_store (insn, &base, &offset);
14509 if (fusion == SCHED_FUSION_NONE)
14511 *pri = tmp;
14512 *fusion_pri = tmp;
14513 return;
14516 /* Set FUSION_PRI according to fusion type and base register. */
14517 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
14519 /* Calculate PRI. */
14520 tmp /= 2;
14522 /* INSN with smaller offset goes first. */
14523 off_val = (int)(INTVAL (offset));
14524 if (off_val >= 0)
14525 tmp -= (off_val & 0xfffff);
14526 else
14527 tmp += ((- off_val) & 0xfffff);
14529 *pri = tmp;
14530 return;
14533 /* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14534 Adjust priority of sha1h instructions so they are scheduled before
14535 other SHA1 instructions. */
14537 static int
14538 aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
14540 rtx x = PATTERN (insn);
14542 if (GET_CODE (x) == SET)
14544 x = SET_SRC (x);
14546 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
14547 return priority + 10;
14550 return priority;
14553 /* Given OPERANDS of consecutive load/store, check if we can merge
14554 them into ldp/stp. LOAD is true if they are load instructions.
14555 MODE is the mode of memory operands. */
14557 bool
14558 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
14559 machine_mode mode)
14561 HOST_WIDE_INT offval_1, offval_2, msize;
14562 enum reg_class rclass_1, rclass_2;
14563 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
14565 if (load)
14567 mem_1 = operands[1];
14568 mem_2 = operands[3];
14569 reg_1 = operands[0];
14570 reg_2 = operands[2];
14571 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
14572 if (REGNO (reg_1) == REGNO (reg_2))
14573 return false;
14575 else
14577 mem_1 = operands[0];
14578 mem_2 = operands[2];
14579 reg_1 = operands[1];
14580 reg_2 = operands[3];
14583 /* The mems cannot be volatile. */
14584 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
14585 return false;
14587 /* If we have SImode and slow unaligned ldp,
14588 check the alignment to be at least 8 byte. */
14589 if (mode == SImode
14590 && (aarch64_tune_params.extra_tuning_flags
14591 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14592 && !optimize_size
14593 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14594 return false;
14596 /* Check if the addresses are in the form of [base+offset]. */
14597 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14598 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14599 return false;
14600 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14601 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14602 return false;
14604 /* Check if the bases are same. */
14605 if (!rtx_equal_p (base_1, base_2))
14606 return false;
14608 offval_1 = INTVAL (offset_1);
14609 offval_2 = INTVAL (offset_2);
14610 msize = GET_MODE_SIZE (mode);
14611 /* Check if the offsets are consecutive. */
14612 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
14613 return false;
14615 /* Check if the addresses are clobbered by load. */
14616 if (load)
14618 if (reg_mentioned_p (reg_1, mem_1))
14619 return false;
14621 /* In increasing order, the last load can clobber the address. */
14622 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
14623 return false;
14626 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14627 rclass_1 = FP_REGS;
14628 else
14629 rclass_1 = GENERAL_REGS;
14631 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14632 rclass_2 = FP_REGS;
14633 else
14634 rclass_2 = GENERAL_REGS;
14636 /* Check if the registers are of same class. */
14637 if (rclass_1 != rclass_2)
14638 return false;
14640 return true;
14643 /* Given OPERANDS of consecutive load/store, check if we can merge
14644 them into ldp/stp by adjusting the offset. LOAD is true if they
14645 are load instructions. MODE is the mode of memory operands.
14647 Given below consecutive stores:
14649 str w1, [xb, 0x100]
14650 str w1, [xb, 0x104]
14651 str w1, [xb, 0x108]
14652 str w1, [xb, 0x10c]
14654 Though the offsets are out of the range supported by stp, we can
14655 still pair them after adjusting the offset, like:
14657 add scratch, xb, 0x100
14658 stp w1, w1, [scratch]
14659 stp w1, w1, [scratch, 0x8]
14661 The peephole patterns detecting this opportunity should guarantee
14662 the scratch register is avaliable. */
14664 bool
14665 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
14666 scalar_mode mode)
14668 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
14669 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
14670 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
14671 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
14673 if (load)
14675 reg_1 = operands[0];
14676 mem_1 = operands[1];
14677 reg_2 = operands[2];
14678 mem_2 = operands[3];
14679 reg_3 = operands[4];
14680 mem_3 = operands[5];
14681 reg_4 = operands[6];
14682 mem_4 = operands[7];
14683 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
14684 && REG_P (reg_3) && REG_P (reg_4));
14685 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
14686 return false;
14688 else
14690 mem_1 = operands[0];
14691 reg_1 = operands[1];
14692 mem_2 = operands[2];
14693 reg_2 = operands[3];
14694 mem_3 = operands[4];
14695 reg_3 = operands[5];
14696 mem_4 = operands[6];
14697 reg_4 = operands[7];
14699 /* Skip if memory operand is by itslef valid for ldp/stp. */
14700 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
14701 return false;
14703 /* The mems cannot be volatile. */
14704 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
14705 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
14706 return false;
14708 /* Check if the addresses are in the form of [base+offset]. */
14709 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
14710 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
14711 return false;
14712 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
14713 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
14714 return false;
14715 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
14716 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
14717 return false;
14718 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
14719 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
14720 return false;
14722 /* Check if the bases are same. */
14723 if (!rtx_equal_p (base_1, base_2)
14724 || !rtx_equal_p (base_2, base_3)
14725 || !rtx_equal_p (base_3, base_4))
14726 return false;
14728 offval_1 = INTVAL (offset_1);
14729 offval_2 = INTVAL (offset_2);
14730 offval_3 = INTVAL (offset_3);
14731 offval_4 = INTVAL (offset_4);
14732 msize = GET_MODE_SIZE (mode);
14733 /* Check if the offsets are consecutive. */
14734 if ((offval_1 != (offval_2 + msize)
14735 || offval_1 != (offval_3 + msize * 2)
14736 || offval_1 != (offval_4 + msize * 3))
14737 && (offval_4 != (offval_3 + msize)
14738 || offval_4 != (offval_2 + msize * 2)
14739 || offval_4 != (offval_1 + msize * 3)))
14740 return false;
14742 /* Check if the addresses are clobbered by load. */
14743 if (load)
14745 if (reg_mentioned_p (reg_1, mem_1)
14746 || reg_mentioned_p (reg_2, mem_2)
14747 || reg_mentioned_p (reg_3, mem_3))
14748 return false;
14750 /* In increasing order, the last load can clobber the address. */
14751 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
14752 return false;
14755 /* If we have SImode and slow unaligned ldp,
14756 check the alignment to be at least 8 byte. */
14757 if (mode == SImode
14758 && (aarch64_tune_params.extra_tuning_flags
14759 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
14760 && !optimize_size
14761 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
14762 return false;
14764 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
14765 rclass_1 = FP_REGS;
14766 else
14767 rclass_1 = GENERAL_REGS;
14769 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
14770 rclass_2 = FP_REGS;
14771 else
14772 rclass_2 = GENERAL_REGS;
14774 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
14775 rclass_3 = FP_REGS;
14776 else
14777 rclass_3 = GENERAL_REGS;
14779 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
14780 rclass_4 = FP_REGS;
14781 else
14782 rclass_4 = GENERAL_REGS;
14784 /* Check if the registers are of same class. */
14785 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
14786 return false;
14788 return true;
14791 /* Given OPERANDS of consecutive load/store, this function pairs them
14792 into ldp/stp after adjusting the offset. It depends on the fact
14793 that addresses of load/store instructions are in increasing order.
14794 MODE is the mode of memory operands. CODE is the rtl operator
14795 which should be applied to all memory operands, it's SIGN_EXTEND,
14796 ZERO_EXTEND or UNKNOWN. */
14798 bool
14799 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
14800 scalar_mode mode, RTX_CODE code)
14802 rtx base, offset, t1, t2;
14803 rtx mem_1, mem_2, mem_3, mem_4;
14804 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
14806 if (load)
14808 mem_1 = operands[1];
14809 mem_2 = operands[3];
14810 mem_3 = operands[5];
14811 mem_4 = operands[7];
14813 else
14815 mem_1 = operands[0];
14816 mem_2 = operands[2];
14817 mem_3 = operands[4];
14818 mem_4 = operands[6];
14819 gcc_assert (code == UNKNOWN);
14822 extract_base_offset_in_addr (mem_1, &base, &offset);
14823 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
14825 /* Adjust offset thus it can fit in ldp/stp instruction. */
14826 msize = GET_MODE_SIZE (mode);
14827 stp_off_limit = msize * 0x40;
14828 off_val = INTVAL (offset);
14829 abs_off = (off_val < 0) ? -off_val : off_val;
14830 new_off = abs_off % stp_off_limit;
14831 adj_off = abs_off - new_off;
14833 /* Further adjust to make sure all offsets are OK. */
14834 if ((new_off + msize * 2) >= stp_off_limit)
14836 adj_off += stp_off_limit;
14837 new_off -= stp_off_limit;
14840 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14841 if (adj_off >= 0x1000)
14842 return false;
14844 if (off_val < 0)
14846 adj_off = -adj_off;
14847 new_off = -new_off;
14850 /* Create new memory references. */
14851 mem_1 = change_address (mem_1, VOIDmode,
14852 plus_constant (DImode, operands[8], new_off));
14854 /* Check if the adjusted address is OK for ldp/stp. */
14855 if (!aarch64_mem_pair_operand (mem_1, mode))
14856 return false;
14858 msize = GET_MODE_SIZE (mode);
14859 mem_2 = change_address (mem_2, VOIDmode,
14860 plus_constant (DImode,
14861 operands[8],
14862 new_off + msize));
14863 mem_3 = change_address (mem_3, VOIDmode,
14864 plus_constant (DImode,
14865 operands[8],
14866 new_off + msize * 2));
14867 mem_4 = change_address (mem_4, VOIDmode,
14868 plus_constant (DImode,
14869 operands[8],
14870 new_off + msize * 3));
14872 if (code == ZERO_EXTEND)
14874 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
14875 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
14876 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
14877 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
14879 else if (code == SIGN_EXTEND)
14881 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
14882 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
14883 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
14884 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
14887 if (load)
14889 operands[1] = mem_1;
14890 operands[3] = mem_2;
14891 operands[5] = mem_3;
14892 operands[7] = mem_4;
14894 else
14896 operands[0] = mem_1;
14897 operands[2] = mem_2;
14898 operands[4] = mem_3;
14899 operands[6] = mem_4;
14902 /* Emit adjusting instruction. */
14903 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
14904 /* Emit ldp/stp instructions. */
14905 t1 = gen_rtx_SET (operands[0], operands[1]);
14906 t2 = gen_rtx_SET (operands[2], operands[3]);
14907 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14908 t1 = gen_rtx_SET (operands[4], operands[5]);
14909 t2 = gen_rtx_SET (operands[6], operands[7]);
14910 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
14911 return true;
14914 /* Return 1 if pseudo register should be created and used to hold
14915 GOT address for PIC code. */
14917 bool
14918 aarch64_use_pseudo_pic_reg (void)
14920 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
14923 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14925 static int
14926 aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
14928 switch (XINT (x, 1))
14930 case UNSPEC_GOTSMALLPIC:
14931 case UNSPEC_GOTSMALLPIC28K:
14932 case UNSPEC_GOTTINYPIC:
14933 return 0;
14934 default:
14935 break;
14938 return default_unspec_may_trap_p (x, flags);
14942 /* If X is a positive CONST_DOUBLE with a value that is a power of 2
14943 return the log2 of that value. Otherwise return -1. */
14946 aarch64_fpconst_pow_of_2 (rtx x)
14948 const REAL_VALUE_TYPE *r;
14950 if (!CONST_DOUBLE_P (x))
14951 return -1;
14953 r = CONST_DOUBLE_REAL_VALUE (x);
14955 if (REAL_VALUE_NEGATIVE (*r)
14956 || REAL_VALUE_ISNAN (*r)
14957 || REAL_VALUE_ISINF (*r)
14958 || !real_isinteger (r, DFmode))
14959 return -1;
14961 return exact_log2 (real_to_integer (r));
14964 /* If X is a vector of equal CONST_DOUBLE values and that value is
14965 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14968 aarch64_vec_fpconst_pow_of_2 (rtx x)
14970 if (GET_CODE (x) != CONST_VECTOR)
14971 return -1;
14973 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
14974 return -1;
14976 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
14977 if (firstval <= 0)
14978 return -1;
14980 for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++)
14981 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
14982 return -1;
14984 return firstval;
14987 /* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14988 to float.
14990 __fp16 always promotes through this hook.
14991 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14992 through the generic excess precision logic rather than here. */
14994 static tree
14995 aarch64_promoted_type (const_tree t)
14997 if (SCALAR_FLOAT_TYPE_P (t)
14998 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
14999 return float_type_node;
15001 return NULL_TREE;
15004 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
15006 static bool
15007 aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
15008 optimization_type opt_type)
15010 switch (op)
15012 case rsqrt_optab:
15013 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
15015 default:
15016 return true;
15020 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
15021 if MODE is HFmode, and punt to the generic implementation otherwise. */
15023 static bool
15024 aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
15026 return (mode == HFmode
15027 ? true
15028 : default_libgcc_floating_mode_supported_p (mode));
15031 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
15032 if MODE is HFmode, and punt to the generic implementation otherwise. */
15034 static bool
15035 aarch64_scalar_mode_supported_p (scalar_mode mode)
15037 return (mode == HFmode
15038 ? true
15039 : default_scalar_mode_supported_p (mode));
15042 /* Set the value of FLT_EVAL_METHOD.
15043 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
15045 0: evaluate all operations and constants, whose semantic type has at
15046 most the range and precision of type float, to the range and
15047 precision of float; evaluate all other operations and constants to
15048 the range and precision of the semantic type;
15050 N, where _FloatN is a supported interchange floating type
15051 evaluate all operations and constants, whose semantic type has at
15052 most the range and precision of _FloatN type, to the range and
15053 precision of the _FloatN type; evaluate all other operations and
15054 constants to the range and precision of the semantic type;
15056 If we have the ARMv8.2-A extensions then we support _Float16 in native
15057 precision, so we should set this to 16. Otherwise, we support the type,
15058 but want to evaluate expressions in float precision, so set this to
15059 0. */
15061 static enum flt_eval_method
15062 aarch64_excess_precision (enum excess_precision_type type)
15064 switch (type)
15066 case EXCESS_PRECISION_TYPE_FAST:
15067 case EXCESS_PRECISION_TYPE_STANDARD:
15068 /* We can calculate either in 16-bit range and precision or
15069 32-bit range and precision. Make that decision based on whether
15070 we have native support for the ARMv8.2-A 16-bit floating-point
15071 instructions or not. */
15072 return (TARGET_FP_F16INST
15073 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
15074 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
15075 case EXCESS_PRECISION_TYPE_IMPLICIT:
15076 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
15077 default:
15078 gcc_unreachable ();
15080 return FLT_EVAL_METHOD_UNPREDICTABLE;
15083 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
15084 scheduled for speculative execution. Reject the long-running division
15085 and square-root instructions. */
15087 static bool
15088 aarch64_sched_can_speculate_insn (rtx_insn *insn)
15090 switch (get_attr_type (insn))
15092 case TYPE_SDIV:
15093 case TYPE_UDIV:
15094 case TYPE_FDIVS:
15095 case TYPE_FDIVD:
15096 case TYPE_FSQRTS:
15097 case TYPE_FSQRTD:
15098 case TYPE_NEON_FP_SQRT_S:
15099 case TYPE_NEON_FP_SQRT_D:
15100 case TYPE_NEON_FP_SQRT_S_Q:
15101 case TYPE_NEON_FP_SQRT_D_Q:
15102 case TYPE_NEON_FP_DIV_S:
15103 case TYPE_NEON_FP_DIV_D:
15104 case TYPE_NEON_FP_DIV_S_Q:
15105 case TYPE_NEON_FP_DIV_D_Q:
15106 return false;
15107 default:
15108 return true;
15112 /* Target-specific selftests. */
15114 #if CHECKING_P
15116 namespace selftest {
15118 /* Selftest for the RTL loader.
15119 Verify that the RTL loader copes with a dump from
15120 print_rtx_function. This is essentially just a test that class
15121 function_reader can handle a real dump, but it also verifies
15122 that lookup_reg_by_dump_name correctly handles hard regs.
15123 The presence of hard reg names in the dump means that the test is
15124 target-specific, hence it is in this file. */
15126 static void
15127 aarch64_test_loading_full_dump ()
15129 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
15131 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
15133 rtx_insn *insn_1 = get_insn_by_uid (1);
15134 ASSERT_EQ (NOTE, GET_CODE (insn_1));
15136 rtx_insn *insn_15 = get_insn_by_uid (15);
15137 ASSERT_EQ (INSN, GET_CODE (insn_15));
15138 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
15140 /* Verify crtl->return_rtx. */
15141 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
15142 ASSERT_EQ (0, REGNO (crtl->return_rtx));
15143 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
15146 /* Run all target-specific selftests. */
15148 static void
15149 aarch64_run_selftests (void)
15151 aarch64_test_loading_full_dump ();
15154 } // namespace selftest
15156 #endif /* #if CHECKING_P */
15158 #undef TARGET_ADDRESS_COST
15159 #define TARGET_ADDRESS_COST aarch64_address_cost
15161 /* This hook will determines whether unnamed bitfields affect the alignment
15162 of the containing structure. The hook returns true if the structure
15163 should inherit the alignment requirements of an unnamed bitfield's
15164 type. */
15165 #undef TARGET_ALIGN_ANON_BITFIELD
15166 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
15168 #undef TARGET_ASM_ALIGNED_DI_OP
15169 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
15171 #undef TARGET_ASM_ALIGNED_HI_OP
15172 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
15174 #undef TARGET_ASM_ALIGNED_SI_OP
15175 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
15177 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15178 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15179 hook_bool_const_tree_hwi_hwi_const_tree_true
15181 #undef TARGET_ASM_FILE_START
15182 #define TARGET_ASM_FILE_START aarch64_start_file
15184 #undef TARGET_ASM_OUTPUT_MI_THUNK
15185 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15187 #undef TARGET_ASM_SELECT_RTX_SECTION
15188 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15190 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15191 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15193 #undef TARGET_BUILD_BUILTIN_VA_LIST
15194 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15196 #undef TARGET_CALLEE_COPIES
15197 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15199 #undef TARGET_CAN_ELIMINATE
15200 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15202 #undef TARGET_CAN_INLINE_P
15203 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
15205 #undef TARGET_CANNOT_FORCE_CONST_MEM
15206 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15208 #undef TARGET_CASE_VALUES_THRESHOLD
15209 #define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15211 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15212 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15214 /* Only the least significant bit is used for initialization guard
15215 variables. */
15216 #undef TARGET_CXX_GUARD_MASK_BIT
15217 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15219 #undef TARGET_C_MODE_FOR_SUFFIX
15220 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15222 #ifdef TARGET_BIG_ENDIAN_DEFAULT
15223 #undef TARGET_DEFAULT_TARGET_FLAGS
15224 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15225 #endif
15227 #undef TARGET_CLASS_MAX_NREGS
15228 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15230 #undef TARGET_BUILTIN_DECL
15231 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
15233 #undef TARGET_BUILTIN_RECIPROCAL
15234 #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15236 #undef TARGET_C_EXCESS_PRECISION
15237 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15239 #undef TARGET_EXPAND_BUILTIN
15240 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15242 #undef TARGET_EXPAND_BUILTIN_VA_START
15243 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15245 #undef TARGET_FOLD_BUILTIN
15246 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15248 #undef TARGET_FUNCTION_ARG
15249 #define TARGET_FUNCTION_ARG aarch64_function_arg
15251 #undef TARGET_FUNCTION_ARG_ADVANCE
15252 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15254 #undef TARGET_FUNCTION_ARG_BOUNDARY
15255 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15257 #undef TARGET_FUNCTION_ARG_PADDING
15258 #define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
15260 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15261 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15263 #undef TARGET_FUNCTION_VALUE
15264 #define TARGET_FUNCTION_VALUE aarch64_function_value
15266 #undef TARGET_FUNCTION_VALUE_REGNO_P
15267 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15269 #undef TARGET_GIMPLE_FOLD_BUILTIN
15270 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
15272 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15273 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15275 #undef TARGET_INIT_BUILTINS
15276 #define TARGET_INIT_BUILTINS aarch64_init_builtins
15278 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15279 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15280 aarch64_ira_change_pseudo_allocno_class
15282 #undef TARGET_LEGITIMATE_ADDRESS_P
15283 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15285 #undef TARGET_LEGITIMATE_CONSTANT_P
15286 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15288 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15289 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15290 aarch64_legitimize_address_displacement
15292 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15293 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15295 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15296 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15297 aarch64_libgcc_floating_mode_supported_p
15299 #undef TARGET_MANGLE_TYPE
15300 #define TARGET_MANGLE_TYPE aarch64_mangle_type
15302 #undef TARGET_MEMORY_MOVE_COST
15303 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15305 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15306 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15308 #undef TARGET_MUST_PASS_IN_STACK
15309 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15311 /* This target hook should return true if accesses to volatile bitfields
15312 should use the narrowest mode possible. It should return false if these
15313 accesses should use the bitfield container type. */
15314 #undef TARGET_NARROW_VOLATILE_BITFIELD
15315 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15317 #undef TARGET_OPTION_OVERRIDE
15318 #define TARGET_OPTION_OVERRIDE aarch64_override_options
15320 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15321 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15322 aarch64_override_options_after_change
15324 #undef TARGET_OPTION_SAVE
15325 #define TARGET_OPTION_SAVE aarch64_option_save
15327 #undef TARGET_OPTION_RESTORE
15328 #define TARGET_OPTION_RESTORE aarch64_option_restore
15330 #undef TARGET_OPTION_PRINT
15331 #define TARGET_OPTION_PRINT aarch64_option_print
15333 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15334 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15336 #undef TARGET_SET_CURRENT_FUNCTION
15337 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15339 #undef TARGET_PASS_BY_REFERENCE
15340 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15342 #undef TARGET_PREFERRED_RELOAD_CLASS
15343 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15345 #undef TARGET_SCHED_REASSOCIATION_WIDTH
15346 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15348 #undef TARGET_PROMOTED_TYPE
15349 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
15351 #undef TARGET_SECONDARY_RELOAD
15352 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15354 #undef TARGET_SHIFT_TRUNCATION_MASK
15355 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15357 #undef TARGET_SETUP_INCOMING_VARARGS
15358 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15360 #undef TARGET_STRUCT_VALUE_RTX
15361 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15363 #undef TARGET_REGISTER_MOVE_COST
15364 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15366 #undef TARGET_RETURN_IN_MEMORY
15367 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15369 #undef TARGET_RETURN_IN_MSB
15370 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15372 #undef TARGET_RTX_COSTS
15373 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
15375 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15376 #define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15378 #undef TARGET_SCHED_ISSUE_RATE
15379 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15383 aarch64_sched_first_cycle_multipass_dfa_lookahead
15385 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15386 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15387 aarch64_first_cycle_multipass_dfa_lookahead_guard
15389 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15390 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15391 aarch64_get_separate_components
15393 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15394 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15395 aarch64_components_for_bb
15397 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15398 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15399 aarch64_disqualify_components
15401 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15402 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15403 aarch64_emit_prologue_components
15405 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15406 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15407 aarch64_emit_epilogue_components
15409 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15410 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15411 aarch64_set_handled_components
15413 #undef TARGET_TRAMPOLINE_INIT
15414 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15416 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15417 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15419 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15420 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15422 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15423 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15424 aarch64_builtin_support_vector_misalignment
15426 #undef TARGET_ARRAY_MODE_SUPPORTED_P
15427 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15429 #undef TARGET_VECTORIZE_ADD_STMT_COST
15430 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15432 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15433 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15434 aarch64_builtin_vectorization_cost
15436 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15437 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15439 #undef TARGET_VECTORIZE_BUILTINS
15440 #define TARGET_VECTORIZE_BUILTINS
15442 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15443 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15444 aarch64_builtin_vectorized_function
15446 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15447 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15448 aarch64_autovectorize_vector_sizes
15450 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15451 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15452 aarch64_atomic_assign_expand_fenv
15454 /* Section anchor support. */
15456 #undef TARGET_MIN_ANCHOR_OFFSET
15457 #define TARGET_MIN_ANCHOR_OFFSET -256
15459 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15460 byte offset; we can do much more for larger data types, but have no way
15461 to determine the size of the access. We assume accesses are aligned. */
15462 #undef TARGET_MAX_ANCHOR_OFFSET
15463 #define TARGET_MAX_ANCHOR_OFFSET 4095
15465 #undef TARGET_VECTOR_ALIGNMENT
15466 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15468 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15469 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15470 aarch64_simd_vector_alignment_reachable
15472 /* vec_perm support. */
15474 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15475 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15476 aarch64_vectorize_vec_perm_const_ok
15478 #undef TARGET_INIT_LIBFUNCS
15479 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
15481 #undef TARGET_FIXED_CONDITION_CODE_REGS
15482 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15484 #undef TARGET_FLAGS_REGNUM
15485 #define TARGET_FLAGS_REGNUM CC_REGNUM
15487 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15488 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15490 #undef TARGET_ASAN_SHADOW_OFFSET
15491 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15493 #undef TARGET_LEGITIMIZE_ADDRESS
15494 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15496 #undef TARGET_SCHED_CAN_SPECULATE_INSN
15497 #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15499 #undef TARGET_CAN_USE_DOLOOP_P
15500 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15502 #undef TARGET_SCHED_ADJUST_PRIORITY
15503 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15505 #undef TARGET_SCHED_MACRO_FUSION_P
15506 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15508 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15509 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15511 #undef TARGET_SCHED_FUSION_PRIORITY
15512 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15514 #undef TARGET_UNSPEC_MAY_TRAP_P
15515 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15517 #undef TARGET_USE_PSEUDO_PIC_REG
15518 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15520 #undef TARGET_PRINT_OPERAND
15521 #define TARGET_PRINT_OPERAND aarch64_print_operand
15523 #undef TARGET_PRINT_OPERAND_ADDRESS
15524 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15526 #undef TARGET_OPTAB_SUPPORTED_P
15527 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15529 #undef TARGET_OMIT_STRUCT_RETURN_REG
15530 #define TARGET_OMIT_STRUCT_RETURN_REG true
15532 /* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15533 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15534 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15536 #undef TARGET_HARD_REGNO_NREGS
15537 #define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
15538 #undef TARGET_HARD_REGNO_MODE_OK
15539 #define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
15541 #undef TARGET_MODES_TIEABLE_P
15542 #define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
15544 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
15545 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
15546 aarch64_hard_regno_call_part_clobbered
15548 #undef TARGET_CONSTANT_ALIGNMENT
15549 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
15551 #if CHECKING_P
15552 #undef TARGET_RUN_TARGET_SELFTESTS
15553 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15554 #endif /* #if CHECKING_P */
15556 struct gcc_target targetm = TARGET_INITIALIZER;
15558 #include "gt-aarch64.h"