1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define INCLUDE_STRING
24 #include "coretypes.h"
35 #include "stringpool.h"
41 #include "diagnostic.h"
42 #include "insn-attr.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
53 #include "langhooks.h"
58 #include "gimple-iterator.h"
59 #include "tree-vectorizer.h"
60 #include "aarch64-cost-tables.h"
64 #include "tm-constrs.h"
65 #include "sched-int.h"
66 #include "target-globals.h"
67 #include "common/common-target.h"
69 #include "selftest-rtl.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Defined for convenience. */
75 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
77 /* Classifies an address.
80 A simple base register plus immediate offset.
83 A base register indexed by immediate offset with writeback.
86 A base register indexed by (optionally scaled) register.
89 A base register indexed by (optionally scaled) zero-extended register.
92 A base register indexed by (optionally scaled) sign-extended register.
95 A LO_SUM rtx with a base register and "LO12" symbol relocation.
98 A constant symbolic address, in pc-relative literal pool. */
100 enum aarch64_address_type
{
110 struct aarch64_address_info
{
111 enum aarch64_address_type type
;
115 enum aarch64_symbol_type symbol_type
;
118 struct simd_immediate_info
127 /* The current code model. */
128 enum aarch64_code_model aarch64_cmodel
;
131 #undef TARGET_HAVE_TLS
132 #define TARGET_HAVE_TLS 1
135 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
136 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
138 machine_mode
*, int *,
140 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
141 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
142 static void aarch64_override_options_after_change (void);
143 static bool aarch64_vector_mode_supported_p (machine_mode
);
144 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode
,
146 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
147 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode
,
152 aarch64_simd_container_mode (scalar_mode mode
, unsigned width
);
154 /* Major revision number of the ARM Architecture implemented by the target. */
155 unsigned aarch64_architecture_version
;
157 /* The processor for which instructions should be scheduled. */
158 enum aarch64_processor aarch64_tune
= cortexa53
;
160 /* Mask to specify which instruction scheduling options should be used. */
161 unsigned long aarch64_tune_flags
= 0;
163 /* Global flag for PC relative loads. */
164 bool aarch64_pcrelative_literal_loads
;
166 /* Support for command line parsing of boolean flags in the tuning
168 struct aarch64_flag_desc
174 #define AARCH64_FUSION_PAIR(name, internal_name) \
175 { name, AARCH64_FUSE_##internal_name },
176 static const struct aarch64_flag_desc aarch64_fusible_pairs
[] =
178 { "none", AARCH64_FUSE_NOTHING
},
179 #include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL
},
181 { NULL
, AARCH64_FUSE_NOTHING
}
184 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
185 { name, AARCH64_EXTRA_TUNE_##internal_name },
186 static const struct aarch64_flag_desc aarch64_tuning_flags
[] =
188 { "none", AARCH64_EXTRA_TUNE_NONE
},
189 #include "aarch64-tuning-flags.def"
190 { "all", AARCH64_EXTRA_TUNE_ALL
},
191 { NULL
, AARCH64_EXTRA_TUNE_NONE
}
194 /* Tuning parameters. */
196 static const struct cpu_addrcost_table generic_addrcost_table
=
206 0, /* register_offset */
207 0, /* register_sextend */
208 0, /* register_zextend */
212 static const struct cpu_addrcost_table exynosm1_addrcost_table
=
222 1, /* register_offset */
223 1, /* register_sextend */
224 2, /* register_zextend */
228 static const struct cpu_addrcost_table xgene1_addrcost_table
=
238 0, /* register_offset */
239 1, /* register_sextend */
240 1, /* register_zextend */
244 static const struct cpu_addrcost_table thunderx2t99_addrcost_table
=
254 2, /* register_offset */
255 3, /* register_sextend */
256 3, /* register_zextend */
260 static const struct cpu_regmove_cost generic_regmove_cost
=
263 /* Avoid the use of slow int<->fp moves for spilling by setting
264 their cost higher than memmov_cost. */
270 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
273 /* Avoid the use of slow int<->fp moves for spilling by setting
274 their cost higher than memmov_cost. */
280 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
283 /* Avoid the use of slow int<->fp moves for spilling by setting
284 their cost higher than memmov_cost. */
290 static const struct cpu_regmove_cost exynosm1_regmove_cost
=
293 /* Avoid the use of slow int<->fp moves for spilling by setting
294 their cost higher than memmov_cost (actual, 4 and 9). */
300 static const struct cpu_regmove_cost thunderx_regmove_cost
=
308 static const struct cpu_regmove_cost xgene1_regmove_cost
=
311 /* Avoid the use of slow int<->fp moves for spilling by setting
312 their cost higher than memmov_cost. */
318 static const struct cpu_regmove_cost qdf24xx_regmove_cost
=
321 /* Avoid the use of int<->fp moves for spilling. */
327 static const struct cpu_regmove_cost thunderx2t99_regmove_cost
=
330 /* Avoid the use of int<->fp moves for spilling. */
336 /* Generic costs for vector insn classes. */
337 static const struct cpu_vector_cost generic_vector_cost
=
339 1, /* scalar_int_stmt_cost */
340 1, /* scalar_fp_stmt_cost */
341 1, /* scalar_load_cost */
342 1, /* scalar_store_cost */
343 1, /* vec_int_stmt_cost */
344 1, /* vec_fp_stmt_cost */
345 2, /* vec_permute_cost */
346 1, /* vec_to_scalar_cost */
347 1, /* scalar_to_vec_cost */
348 1, /* vec_align_load_cost */
349 1, /* vec_unalign_load_cost */
350 1, /* vec_unalign_store_cost */
351 1, /* vec_store_cost */
352 3, /* cond_taken_branch_cost */
353 1 /* cond_not_taken_branch_cost */
356 /* ThunderX costs for vector insn classes. */
357 static const struct cpu_vector_cost thunderx_vector_cost
=
359 1, /* scalar_int_stmt_cost */
360 1, /* scalar_fp_stmt_cost */
361 3, /* scalar_load_cost */
362 1, /* scalar_store_cost */
363 4, /* vec_int_stmt_cost */
364 1, /* vec_fp_stmt_cost */
365 4, /* vec_permute_cost */
366 2, /* vec_to_scalar_cost */
367 2, /* scalar_to_vec_cost */
368 3, /* vec_align_load_cost */
369 5, /* vec_unalign_load_cost */
370 5, /* vec_unalign_store_cost */
371 1, /* vec_store_cost */
372 3, /* cond_taken_branch_cost */
373 3 /* cond_not_taken_branch_cost */
376 /* Generic costs for vector insn classes. */
377 static const struct cpu_vector_cost cortexa57_vector_cost
=
379 1, /* scalar_int_stmt_cost */
380 1, /* scalar_fp_stmt_cost */
381 4, /* scalar_load_cost */
382 1, /* scalar_store_cost */
383 2, /* vec_int_stmt_cost */
384 2, /* vec_fp_stmt_cost */
385 3, /* vec_permute_cost */
386 8, /* vec_to_scalar_cost */
387 8, /* scalar_to_vec_cost */
388 4, /* vec_align_load_cost */
389 4, /* vec_unalign_load_cost */
390 1, /* vec_unalign_store_cost */
391 1, /* vec_store_cost */
392 1, /* cond_taken_branch_cost */
393 1 /* cond_not_taken_branch_cost */
396 static const struct cpu_vector_cost exynosm1_vector_cost
=
398 1, /* scalar_int_stmt_cost */
399 1, /* scalar_fp_stmt_cost */
400 5, /* scalar_load_cost */
401 1, /* scalar_store_cost */
402 3, /* vec_int_stmt_cost */
403 3, /* vec_fp_stmt_cost */
404 3, /* vec_permute_cost */
405 3, /* vec_to_scalar_cost */
406 3, /* scalar_to_vec_cost */
407 5, /* vec_align_load_cost */
408 5, /* vec_unalign_load_cost */
409 1, /* vec_unalign_store_cost */
410 1, /* vec_store_cost */
411 1, /* cond_taken_branch_cost */
412 1 /* cond_not_taken_branch_cost */
415 /* Generic costs for vector insn classes. */
416 static const struct cpu_vector_cost xgene1_vector_cost
=
418 1, /* scalar_int_stmt_cost */
419 1, /* scalar_fp_stmt_cost */
420 5, /* scalar_load_cost */
421 1, /* scalar_store_cost */
422 2, /* vec_int_stmt_cost */
423 2, /* vec_fp_stmt_cost */
424 2, /* vec_permute_cost */
425 4, /* vec_to_scalar_cost */
426 4, /* scalar_to_vec_cost */
427 10, /* vec_align_load_cost */
428 10, /* vec_unalign_load_cost */
429 2, /* vec_unalign_store_cost */
430 2, /* vec_store_cost */
431 2, /* cond_taken_branch_cost */
432 1 /* cond_not_taken_branch_cost */
435 /* Costs for vector insn classes for Vulcan. */
436 static const struct cpu_vector_cost thunderx2t99_vector_cost
=
438 1, /* scalar_int_stmt_cost */
439 6, /* scalar_fp_stmt_cost */
440 4, /* scalar_load_cost */
441 1, /* scalar_store_cost */
442 5, /* vec_int_stmt_cost */
443 6, /* vec_fp_stmt_cost */
444 3, /* vec_permute_cost */
445 6, /* vec_to_scalar_cost */
446 5, /* scalar_to_vec_cost */
447 8, /* vec_align_load_cost */
448 8, /* vec_unalign_load_cost */
449 4, /* vec_unalign_store_cost */
450 4, /* vec_store_cost */
451 2, /* cond_taken_branch_cost */
452 1 /* cond_not_taken_branch_cost */
455 /* Generic costs for branch instructions. */
456 static const struct cpu_branch_cost generic_branch_cost
=
458 1, /* Predictable. */
459 3 /* Unpredictable. */
462 /* Generic approximation modes. */
463 static const cpu_approx_modes generic_approx_modes
=
465 AARCH64_APPROX_NONE
, /* division */
466 AARCH64_APPROX_NONE
, /* sqrt */
467 AARCH64_APPROX_NONE
/* recip_sqrt */
470 /* Approximation modes for Exynos M1. */
471 static const cpu_approx_modes exynosm1_approx_modes
=
473 AARCH64_APPROX_NONE
, /* division */
474 AARCH64_APPROX_ALL
, /* sqrt */
475 AARCH64_APPROX_ALL
/* recip_sqrt */
478 /* Approximation modes for X-Gene 1. */
479 static const cpu_approx_modes xgene1_approx_modes
=
481 AARCH64_APPROX_NONE
, /* division */
482 AARCH64_APPROX_NONE
, /* sqrt */
483 AARCH64_APPROX_ALL
/* recip_sqrt */
486 /* Generic prefetch settings (which disable prefetch). */
487 static const cpu_prefetch_tune generic_prefetch_tune
=
490 -1, /* l1_cache_size */
491 -1, /* l1_cache_line_size */
492 -1, /* l2_cache_size */
493 -1 /* default_opt_level */
496 static const cpu_prefetch_tune exynosm1_prefetch_tune
=
499 -1, /* l1_cache_size */
500 64, /* l1_cache_line_size */
501 -1, /* l2_cache_size */
502 -1 /* default_opt_level */
505 static const cpu_prefetch_tune qdf24xx_prefetch_tune
=
508 32, /* l1_cache_size */
509 64, /* l1_cache_line_size */
510 1024, /* l2_cache_size */
511 3 /* default_opt_level */
514 static const cpu_prefetch_tune thunderxt88_prefetch_tune
=
517 32, /* l1_cache_size */
518 128, /* l1_cache_line_size */
519 16*1024, /* l2_cache_size */
520 3 /* default_opt_level */
523 static const cpu_prefetch_tune thunderx_prefetch_tune
=
526 32, /* l1_cache_size */
527 128, /* l1_cache_line_size */
528 -1, /* l2_cache_size */
529 -1 /* default_opt_level */
532 static const cpu_prefetch_tune thunderx2t99_prefetch_tune
=
535 32, /* l1_cache_size */
536 64, /* l1_cache_line_size */
537 256, /* l2_cache_size */
538 -1 /* default_opt_level */
541 static const struct tune_params generic_tunings
=
543 &cortexa57_extra_costs
,
544 &generic_addrcost_table
,
545 &generic_regmove_cost
,
546 &generic_vector_cost
,
547 &generic_branch_cost
,
548 &generic_approx_modes
,
551 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
552 8, /* function_align. */
555 2, /* int_reassoc_width. */
556 4, /* fp_reassoc_width. */
557 1, /* vec_reassoc_width. */
558 2, /* min_div_recip_mul_sf. */
559 2, /* min_div_recip_mul_df. */
560 0, /* max_case_values. */
561 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
562 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
563 &generic_prefetch_tune
566 static const struct tune_params cortexa35_tunings
=
568 &cortexa53_extra_costs
,
569 &generic_addrcost_table
,
570 &cortexa53_regmove_cost
,
571 &generic_vector_cost
,
572 &generic_branch_cost
,
573 &generic_approx_modes
,
576 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
577 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
578 16, /* function_align. */
581 2, /* int_reassoc_width. */
582 4, /* fp_reassoc_width. */
583 1, /* vec_reassoc_width. */
584 2, /* min_div_recip_mul_sf. */
585 2, /* min_div_recip_mul_df. */
586 0, /* max_case_values. */
587 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
588 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
589 &generic_prefetch_tune
592 static const struct tune_params cortexa53_tunings
=
594 &cortexa53_extra_costs
,
595 &generic_addrcost_table
,
596 &cortexa53_regmove_cost
,
597 &generic_vector_cost
,
598 &generic_branch_cost
,
599 &generic_approx_modes
,
602 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
603 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
604 16, /* function_align. */
607 2, /* int_reassoc_width. */
608 4, /* fp_reassoc_width. */
609 1, /* vec_reassoc_width. */
610 2, /* min_div_recip_mul_sf. */
611 2, /* min_div_recip_mul_df. */
612 0, /* max_case_values. */
613 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
614 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
615 &generic_prefetch_tune
618 static const struct tune_params cortexa57_tunings
=
620 &cortexa57_extra_costs
,
621 &generic_addrcost_table
,
622 &cortexa57_regmove_cost
,
623 &cortexa57_vector_cost
,
624 &generic_branch_cost
,
625 &generic_approx_modes
,
628 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
629 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
630 16, /* function_align. */
633 2, /* int_reassoc_width. */
634 4, /* fp_reassoc_width. */
635 1, /* vec_reassoc_width. */
636 2, /* min_div_recip_mul_sf. */
637 2, /* min_div_recip_mul_df. */
638 0, /* max_case_values. */
639 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
640 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
), /* tune_flags. */
641 &generic_prefetch_tune
644 static const struct tune_params cortexa72_tunings
=
646 &cortexa57_extra_costs
,
647 &generic_addrcost_table
,
648 &cortexa57_regmove_cost
,
649 &cortexa57_vector_cost
,
650 &generic_branch_cost
,
651 &generic_approx_modes
,
654 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
655 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
656 16, /* function_align. */
659 2, /* int_reassoc_width. */
660 4, /* fp_reassoc_width. */
661 1, /* vec_reassoc_width. */
662 2, /* min_div_recip_mul_sf. */
663 2, /* min_div_recip_mul_df. */
664 0, /* max_case_values. */
665 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
666 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
667 &generic_prefetch_tune
670 static const struct tune_params cortexa73_tunings
=
672 &cortexa57_extra_costs
,
673 &generic_addrcost_table
,
674 &cortexa57_regmove_cost
,
675 &cortexa57_vector_cost
,
676 &generic_branch_cost
,
677 &generic_approx_modes
,
678 4, /* memmov_cost. */
680 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
681 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
682 16, /* function_align. */
685 2, /* int_reassoc_width. */
686 4, /* fp_reassoc_width. */
687 1, /* vec_reassoc_width. */
688 2, /* min_div_recip_mul_sf. */
689 2, /* min_div_recip_mul_df. */
690 0, /* max_case_values. */
691 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
692 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
693 &generic_prefetch_tune
698 static const struct tune_params exynosm1_tunings
=
700 &exynosm1_extra_costs
,
701 &exynosm1_addrcost_table
,
702 &exynosm1_regmove_cost
,
703 &exynosm1_vector_cost
,
704 &generic_branch_cost
,
705 &exynosm1_approx_modes
,
708 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
709 4, /* function_align. */
712 2, /* int_reassoc_width. */
713 4, /* fp_reassoc_width. */
714 1, /* vec_reassoc_width. */
715 2, /* min_div_recip_mul_sf. */
716 2, /* min_div_recip_mul_df. */
717 48, /* max_case_values. */
718 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
719 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
720 &exynosm1_prefetch_tune
723 static const struct tune_params thunderxt88_tunings
=
725 &thunderx_extra_costs
,
726 &generic_addrcost_table
,
727 &thunderx_regmove_cost
,
728 &thunderx_vector_cost
,
729 &generic_branch_cost
,
730 &generic_approx_modes
,
733 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
734 8, /* function_align. */
737 2, /* int_reassoc_width. */
738 4, /* fp_reassoc_width. */
739 1, /* vec_reassoc_width. */
740 2, /* min_div_recip_mul_sf. */
741 2, /* min_div_recip_mul_df. */
742 0, /* max_case_values. */
743 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
744 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
), /* tune_flags. */
745 &thunderxt88_prefetch_tune
748 static const struct tune_params thunderx_tunings
=
750 &thunderx_extra_costs
,
751 &generic_addrcost_table
,
752 &thunderx_regmove_cost
,
753 &thunderx_vector_cost
,
754 &generic_branch_cost
,
755 &generic_approx_modes
,
758 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
759 8, /* function_align. */
762 2, /* int_reassoc_width. */
763 4, /* fp_reassoc_width. */
764 1, /* vec_reassoc_width. */
765 2, /* min_div_recip_mul_sf. */
766 2, /* min_div_recip_mul_df. */
767 0, /* max_case_values. */
768 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
769 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
770 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
), /* tune_flags. */
771 &thunderx_prefetch_tune
774 static const struct tune_params xgene1_tunings
=
777 &xgene1_addrcost_table
,
778 &xgene1_regmove_cost
,
780 &generic_branch_cost
,
781 &xgene1_approx_modes
,
784 AARCH64_FUSE_NOTHING
, /* fusible_ops */
785 16, /* function_align. */
787 16, /* loop_align. */
788 2, /* int_reassoc_width. */
789 4, /* fp_reassoc_width. */
790 1, /* vec_reassoc_width. */
791 2, /* min_div_recip_mul_sf. */
792 2, /* min_div_recip_mul_df. */
793 0, /* max_case_values. */
794 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
795 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
796 &generic_prefetch_tune
799 static const struct tune_params qdf24xx_tunings
=
801 &qdf24xx_extra_costs
,
802 &generic_addrcost_table
,
803 &qdf24xx_regmove_cost
,
804 &generic_vector_cost
,
805 &generic_branch_cost
,
806 &generic_approx_modes
,
809 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
810 | AARCH64_FUSE_MOVK_MOVK
), /* fuseable_ops */
811 16, /* function_align. */
813 16, /* loop_align. */
814 2, /* int_reassoc_width. */
815 4, /* fp_reassoc_width. */
816 1, /* vec_reassoc_width. */
817 2, /* min_div_recip_mul_sf. */
818 2, /* min_div_recip_mul_df. */
819 0, /* max_case_values. */
820 tune_params::AUTOPREFETCHER_STRONG
, /* autoprefetcher_model. */
821 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
822 &qdf24xx_prefetch_tune
825 /* Tuning structure for the Qualcomm Saphira core. Default to falkor values
827 static const struct tune_params saphira_tunings
=
829 &generic_extra_costs
,
830 &generic_addrcost_table
,
831 &generic_regmove_cost
,
832 &generic_vector_cost
,
833 &generic_branch_cost
,
834 &generic_approx_modes
,
837 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
838 | AARCH64_FUSE_MOVK_MOVK
), /* fuseable_ops */
839 16, /* function_align. */
841 16, /* loop_align. */
842 2, /* int_reassoc_width. */
843 4, /* fp_reassoc_width. */
844 1, /* vec_reassoc_width. */
845 2, /* min_div_recip_mul_sf. */
846 2, /* min_div_recip_mul_df. */
847 0, /* max_case_values. */
848 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
849 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
850 &generic_prefetch_tune
853 static const struct tune_params thunderx2t99_tunings
=
855 &thunderx2t99_extra_costs
,
856 &thunderx2t99_addrcost_table
,
857 &thunderx2t99_regmove_cost
,
858 &thunderx2t99_vector_cost
,
859 &generic_branch_cost
,
860 &generic_approx_modes
,
861 4, /* memmov_cost. */
863 (AARCH64_FUSE_CMP_BRANCH
| AARCH64_FUSE_AES_AESMC
864 | AARCH64_FUSE_ALU_BRANCH
), /* fusible_ops */
865 16, /* function_align. */
867 16, /* loop_align. */
868 3, /* int_reassoc_width. */
869 2, /* fp_reassoc_width. */
870 2, /* vec_reassoc_width. */
871 2, /* min_div_recip_mul_sf. */
872 2, /* min_div_recip_mul_df. */
873 0, /* max_case_values. */
874 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
875 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
876 &thunderx2t99_prefetch_tune
879 /* Support for fine-grained override of the tuning structures. */
880 struct aarch64_tuning_override_function
883 void (*parse_override
)(const char*, struct tune_params
*);
886 static void aarch64_parse_fuse_string (const char*, struct tune_params
*);
887 static void aarch64_parse_tune_string (const char*, struct tune_params
*);
889 static const struct aarch64_tuning_override_function
890 aarch64_tuning_override_functions
[] =
892 { "fuse", aarch64_parse_fuse_string
},
893 { "tune", aarch64_parse_tune_string
},
897 /* A processor implementing AArch64. */
900 const char *const name
;
901 enum aarch64_processor ident
;
902 enum aarch64_processor sched_core
;
903 enum aarch64_arch arch
;
904 unsigned architecture_version
;
905 const unsigned long flags
;
906 const struct tune_params
*const tune
;
909 /* Architectures implementing AArch64. */
910 static const struct processor all_architectures
[] =
912 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
913 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
914 #include "aarch64-arches.def"
915 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
918 /* Processor cores implementing AArch64. */
919 static const struct processor all_cores
[] =
921 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
922 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
923 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
924 FLAGS, &COSTS##_tunings},
925 #include "aarch64-cores.def"
926 {"generic", generic
, cortexa53
, AARCH64_ARCH_8A
, 8,
927 AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
928 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
932 /* Target specification. These are populated by the -march, -mtune, -mcpu
933 handling code or by target attributes. */
934 static const struct processor
*selected_arch
;
935 static const struct processor
*selected_cpu
;
936 static const struct processor
*selected_tune
;
938 /* The current tuning set. */
939 struct tune_params aarch64_tune_params
= generic_tunings
;
941 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
943 /* An ISA extension in the co-processor and main instruction set space. */
944 struct aarch64_option_extension
946 const char *const name
;
947 const unsigned long flags_on
;
948 const unsigned long flags_off
;
951 typedef enum aarch64_cond_code
953 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
954 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
955 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
959 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
961 /* The condition codes of the processor, and the inverse function. */
962 static const char * const aarch64_condition_codes
[] =
964 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
965 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
968 /* Generate code to enable conditional branches in functions over 1 MiB. */
970 aarch64_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
971 const char * branch_format
)
973 rtx_code_label
* tmp_label
= gen_label_rtx ();
976 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
,
977 CODE_LABEL_NUMBER (tmp_label
));
978 const char *label_ptr
= targetm
.strip_name_encoding (label_buf
);
979 rtx dest_label
= operands
[pos_label
];
980 operands
[pos_label
] = tmp_label
;
982 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
983 output_asm_insn (buffer
, operands
);
985 snprintf (buffer
, sizeof (buffer
), "b\t%%l%d\n%s:", pos_label
, label_ptr
);
986 operands
[pos_label
] = dest_label
;
987 output_asm_insn (buffer
, operands
);
992 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
994 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
995 if (TARGET_GENERAL_REGS_ONLY
)
996 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
998 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
1001 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
1002 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
1003 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
1004 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
1005 cost (in this case the best class is the lowest cost one). Using ALL_REGS
1006 irrespectively of its cost results in bad allocations with many redundant
1007 int<->FP moves which are expensive on various cores.
1008 To avoid this we don't allow ALL_REGS as the allocno class, but force a
1009 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
1010 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
1011 Otherwise set the allocno class depending on the mode.
1012 The result of this is that it is no longer inefficient to have a higher
1013 memory move cost than the register move cost.
1017 aarch64_ira_change_pseudo_allocno_class (int regno
, reg_class_t allocno_class
,
1018 reg_class_t best_class
)
1022 if (allocno_class
!= ALL_REGS
)
1023 return allocno_class
;
1025 if (best_class
!= ALL_REGS
)
1028 mode
= PSEUDO_REGNO_MODE (regno
);
1029 return FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
) ? FP_REGS
: GENERAL_REGS
;
1033 aarch64_min_divisions_for_recip_mul (machine_mode mode
)
1035 if (GET_MODE_UNIT_SIZE (mode
) == 4)
1036 return aarch64_tune_params
.min_div_recip_mul_sf
;
1037 return aarch64_tune_params
.min_div_recip_mul_df
;
1041 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
1044 if (VECTOR_MODE_P (mode
))
1045 return aarch64_tune_params
.vec_reassoc_width
;
1046 if (INTEGRAL_MODE_P (mode
))
1047 return aarch64_tune_params
.int_reassoc_width
;
1048 if (FLOAT_MODE_P (mode
))
1049 return aarch64_tune_params
.fp_reassoc_width
;
1053 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
1055 aarch64_dbx_register_number (unsigned regno
)
1057 if (GP_REGNUM_P (regno
))
1058 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
1059 else if (regno
== SP_REGNUM
)
1060 return AARCH64_DWARF_SP
;
1061 else if (FP_REGNUM_P (regno
))
1062 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
1064 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1065 equivalent DWARF register. */
1066 return DWARF_FRAME_REGISTERS
;
1069 /* Return TRUE if MODE is any of the large INT modes. */
1071 aarch64_vect_struct_mode_p (machine_mode mode
)
1073 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
1076 /* Return TRUE if MODE is any of the vector modes. */
1078 aarch64_vector_mode_p (machine_mode mode
)
1080 return aarch64_vector_mode_supported_p (mode
)
1081 || aarch64_vect_struct_mode_p (mode
);
1084 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1086 aarch64_array_mode_supported_p (machine_mode mode
,
1087 unsigned HOST_WIDE_INT nelems
)
1090 && (AARCH64_VALID_SIMD_QREG_MODE (mode
)
1091 || AARCH64_VALID_SIMD_DREG_MODE (mode
))
1092 && (nelems
>= 2 && nelems
<= 4))
1098 /* Implement TARGET_HARD_REGNO_NREGS. */
1101 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
1103 switch (aarch64_regno_regclass (regno
))
1107 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
1109 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1114 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1117 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
1119 if (GET_MODE_CLASS (mode
) == MODE_CC
)
1120 return regno
== CC_REGNUM
;
1122 if (regno
== SP_REGNUM
)
1123 /* The purpose of comparing with ptr_mode is to support the
1124 global register variable associated with the stack pointer
1125 register via the syntax of asm ("wsp") in ILP32. */
1126 return mode
== Pmode
|| mode
== ptr_mode
;
1128 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
1129 return mode
== Pmode
;
1131 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
1134 if (FP_REGNUM_P (regno
))
1136 if (aarch64_vect_struct_mode_p (mode
))
1137 return end_hard_regno (mode
, regno
) - 1 <= V31_REGNUM
;
1145 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1146 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1147 clobbers the top 64 bits when restoring the bottom 64 bits. */
1150 aarch64_hard_regno_call_part_clobbered (unsigned int regno
, machine_mode mode
)
1152 return FP_REGNUM_P (regno
) && GET_MODE_SIZE (mode
) > 8;
1155 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
1157 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
1160 /* Handle modes that fit within single registers. */
1161 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
1163 if (GET_MODE_SIZE (mode
) >= 4)
1168 /* Fall back to generic for multi-reg and very large modes. */
1170 return choose_hard_reg_mode (regno
, nregs
, false);
1173 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1174 that strcpy from constants will be faster. */
1176 static HOST_WIDE_INT
1177 aarch64_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
1179 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
1180 return MAX (align
, BITS_PER_WORD
);
1184 /* Return true if calls to DECL should be treated as
1185 long-calls (ie called via a register). */
1187 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
1192 /* Return true if calls to symbol-ref SYM should be treated as
1193 long-calls (ie called via a register). */
1195 aarch64_is_long_call_p (rtx sym
)
1197 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
1200 /* Return true if calls to symbol-ref SYM should not go through
1204 aarch64_is_noplt_call_p (rtx sym
)
1206 const_tree decl
= SYMBOL_REF_DECL (sym
);
1211 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl
)))
1212 && !targetm
.binds_local_p (decl
))
1218 /* Return true if the offsets to a zero/sign-extract operation
1219 represent an expression that matches an extend operation. The
1220 operands represent the paramters from
1222 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
1224 aarch64_is_extend_from_extract (scalar_int_mode mode
, rtx mult_imm
,
1227 HOST_WIDE_INT mult_val
, extract_val
;
1229 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
1232 mult_val
= INTVAL (mult_imm
);
1233 extract_val
= INTVAL (extract_imm
);
1236 && extract_val
< GET_MODE_BITSIZE (mode
)
1237 && exact_log2 (extract_val
& ~7) > 0
1238 && (extract_val
& 7) <= 4
1239 && mult_val
== (1 << (extract_val
& 7)))
1245 /* Emit an insn that's a simple single-set. Both the operands must be
1246 known to be valid. */
1247 inline static rtx_insn
*
1248 emit_set_insn (rtx x
, rtx y
)
1250 return emit_insn (gen_rtx_SET (x
, y
));
1253 /* X and Y are two things to compare using CODE. Emit the compare insn and
1254 return the rtx for register 0 in the proper mode. */
1256 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
1258 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
1259 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
1261 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
1265 /* Build the SYMBOL_REF for __tls_get_addr. */
1267 static GTY(()) rtx tls_get_addr_libfunc
;
1270 aarch64_tls_get_addr (void)
1272 if (!tls_get_addr_libfunc
)
1273 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
1274 return tls_get_addr_libfunc
;
1277 /* Return the TLS model to use for ADDR. */
1279 static enum tls_model
1280 tls_symbolic_operand_type (rtx addr
)
1282 enum tls_model tls_kind
= TLS_MODEL_NONE
;
1285 if (GET_CODE (addr
) == CONST
)
1287 split_const (addr
, &sym
, &addend
);
1288 if (GET_CODE (sym
) == SYMBOL_REF
)
1289 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
1291 else if (GET_CODE (addr
) == SYMBOL_REF
)
1292 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
1297 /* We'll allow lo_sum's in addresses in our legitimate addresses
1298 so that combine would take care of combining addresses where
1299 necessary, but for generation purposes, we'll generate the address
1302 tmp = hi (symbol_ref); adrp x1, foo
1303 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1307 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1308 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1312 Load TLS symbol, depending on TLS mechanism and TLS access model.
1314 Global Dynamic - Traditional TLS:
1315 adrp tmp, :tlsgd:imm
1316 add dest, tmp, #:tlsgd_lo12:imm
1319 Global Dynamic - TLS Descriptors:
1320 adrp dest, :tlsdesc:imm
1321 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1322 add dest, dest, #:tlsdesc_lo12:imm
1329 adrp tmp, :gottprel:imm
1330 ldr dest, [tmp, #:gottprel_lo12:imm]
1335 add t0, tp, #:tprel_hi12:imm, lsl #12
1336 add t0, t0, #:tprel_lo12_nc:imm
1340 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
1341 enum aarch64_symbol_type type
)
1345 case SYMBOL_SMALL_ABSOLUTE
:
1347 /* In ILP32, the mode of dest can be either SImode or DImode. */
1349 machine_mode mode
= GET_MODE (dest
);
1351 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1353 if (can_create_pseudo_p ())
1354 tmp_reg
= gen_reg_rtx (mode
);
1356 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1357 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
1361 case SYMBOL_TINY_ABSOLUTE
:
1362 emit_insn (gen_rtx_SET (dest
, imm
));
1365 case SYMBOL_SMALL_GOT_28K
:
1367 machine_mode mode
= GET_MODE (dest
);
1368 rtx gp_rtx
= pic_offset_table_rtx
;
1372 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1373 here before rtl expand. Tree IVOPT will generate rtl pattern to
1374 decide rtx costs, in which case pic_offset_table_rtx is not
1375 initialized. For that case no need to generate the first adrp
1376 instruction as the final cost for global variable access is
1380 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1381 using the page base as GOT base, the first page may be wasted,
1382 in the worst scenario, there is only 28K space for GOT).
1384 The generate instruction sequence for accessing global variable
1387 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1389 Only one instruction needed. But we must initialize
1390 pic_offset_table_rtx properly. We generate initialize insn for
1391 every global access, and allow CSE to remove all redundant.
1393 The final instruction sequences will look like the following
1394 for multiply global variables access.
1396 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1398 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1399 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1400 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1403 rtx s
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1404 crtl
->uses_pic_offset_table
= 1;
1405 emit_move_insn (gp_rtx
, gen_rtx_HIGH (Pmode
, s
));
1407 if (mode
!= GET_MODE (gp_rtx
))
1408 gp_rtx
= gen_lowpart (mode
, gp_rtx
);
1412 if (mode
== ptr_mode
)
1415 insn
= gen_ldr_got_small_28k_di (dest
, gp_rtx
, imm
);
1417 insn
= gen_ldr_got_small_28k_si (dest
, gp_rtx
, imm
);
1419 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1423 gcc_assert (mode
== Pmode
);
1425 insn
= gen_ldr_got_small_28k_sidi (dest
, gp_rtx
, imm
);
1426 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1429 /* The operand is expected to be MEM. Whenever the related insn
1430 pattern changed, above code which calculate mem should be
1432 gcc_assert (GET_CODE (mem
) == MEM
);
1433 MEM_READONLY_P (mem
) = 1;
1434 MEM_NOTRAP_P (mem
) = 1;
1439 case SYMBOL_SMALL_GOT_4G
:
1441 /* In ILP32, the mode of dest can be either SImode or DImode,
1442 while the got entry is always of SImode size. The mode of
1443 dest depends on how dest is used: if dest is assigned to a
1444 pointer (e.g. in the memory), it has SImode; it may have
1445 DImode if dest is dereferenced to access the memeory.
1446 This is why we have to handle three different ldr_got_small
1447 patterns here (two patterns for ILP32). */
1452 machine_mode mode
= GET_MODE (dest
);
1454 if (can_create_pseudo_p ())
1455 tmp_reg
= gen_reg_rtx (mode
);
1457 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1458 if (mode
== ptr_mode
)
1461 insn
= gen_ldr_got_small_di (dest
, tmp_reg
, imm
);
1463 insn
= gen_ldr_got_small_si (dest
, tmp_reg
, imm
);
1465 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1469 gcc_assert (mode
== Pmode
);
1471 insn
= gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
);
1472 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1475 gcc_assert (GET_CODE (mem
) == MEM
);
1476 MEM_READONLY_P (mem
) = 1;
1477 MEM_NOTRAP_P (mem
) = 1;
1482 case SYMBOL_SMALL_TLSGD
:
1485 machine_mode mode
= GET_MODE (dest
);
1486 rtx result
= gen_rtx_REG (mode
, R0_REGNUM
);
1490 aarch64_emit_call_insn (gen_tlsgd_small_si (result
, imm
));
1492 aarch64_emit_call_insn (gen_tlsgd_small_di (result
, imm
));
1493 insns
= get_insns ();
1496 RTL_CONST_CALL_P (insns
) = 1;
1497 emit_libcall_block (insns
, dest
, result
, imm
);
1501 case SYMBOL_SMALL_TLSDESC
:
1503 machine_mode mode
= GET_MODE (dest
);
1504 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
1507 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1509 /* In ILP32, the got entry is always of SImode size. Unlike
1510 small GOT, the dest is fixed at reg 0. */
1512 emit_insn (gen_tlsdesc_small_si (imm
));
1514 emit_insn (gen_tlsdesc_small_di (imm
));
1515 tp
= aarch64_load_tp (NULL
);
1518 tp
= gen_lowpart (mode
, tp
);
1520 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
1522 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1526 case SYMBOL_SMALL_TLSIE
:
1528 /* In ILP32, the mode of dest can be either SImode or DImode,
1529 while the got entry is always of SImode size. The mode of
1530 dest depends on how dest is used: if dest is assigned to a
1531 pointer (e.g. in the memory), it has SImode; it may have
1532 DImode if dest is dereferenced to access the memeory.
1533 This is why we have to handle three different tlsie_small
1534 patterns here (two patterns for ILP32). */
1535 machine_mode mode
= GET_MODE (dest
);
1536 rtx tmp_reg
= gen_reg_rtx (mode
);
1537 rtx tp
= aarch64_load_tp (NULL
);
1539 if (mode
== ptr_mode
)
1542 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
1545 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
1546 tp
= gen_lowpart (mode
, tp
);
1551 gcc_assert (mode
== Pmode
);
1552 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
1555 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
1557 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1561 case SYMBOL_TLSLE12
:
1562 case SYMBOL_TLSLE24
:
1563 case SYMBOL_TLSLE32
:
1564 case SYMBOL_TLSLE48
:
1566 machine_mode mode
= GET_MODE (dest
);
1567 rtx tp
= aarch64_load_tp (NULL
);
1570 tp
= gen_lowpart (mode
, tp
);
1574 case SYMBOL_TLSLE12
:
1575 emit_insn ((mode
== DImode
? gen_tlsle12_di
: gen_tlsle12_si
)
1578 case SYMBOL_TLSLE24
:
1579 emit_insn ((mode
== DImode
? gen_tlsle24_di
: gen_tlsle24_si
)
1582 case SYMBOL_TLSLE32
:
1583 emit_insn ((mode
== DImode
? gen_tlsle32_di
: gen_tlsle32_si
)
1585 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1588 case SYMBOL_TLSLE48
:
1589 emit_insn ((mode
== DImode
? gen_tlsle48_di
: gen_tlsle48_si
)
1591 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1599 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1603 case SYMBOL_TINY_GOT
:
1604 emit_insn (gen_ldr_got_tiny (dest
, imm
));
1607 case SYMBOL_TINY_TLSIE
:
1609 machine_mode mode
= GET_MODE (dest
);
1610 rtx tp
= aarch64_load_tp (NULL
);
1612 if (mode
== ptr_mode
)
1615 emit_insn (gen_tlsie_tiny_di (dest
, imm
, tp
));
1618 tp
= gen_lowpart (mode
, tp
);
1619 emit_insn (gen_tlsie_tiny_si (dest
, imm
, tp
));
1624 gcc_assert (mode
== Pmode
);
1625 emit_insn (gen_tlsie_tiny_sidi (dest
, imm
, tp
));
1629 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1638 /* Emit a move from SRC to DEST. Assume that the move expanders can
1639 handle all moves if !can_create_pseudo_p (). The distinction is
1640 important because, unlike emit_move_insn, the move expanders know
1641 how to force Pmode objects into the constant pool even when the
1642 constant pool address is not itself legitimate. */
1644 aarch64_emit_move (rtx dest
, rtx src
)
1646 return (can_create_pseudo_p ()
1647 ? emit_move_insn (dest
, src
)
1648 : emit_move_insn_1 (dest
, src
));
1651 /* Split a 128-bit move operation into two 64-bit move operations,
1652 taking care to handle partial overlap of register to register
1653 copies. Special cases are needed when moving between GP regs and
1654 FP regs. SRC can be a register, constant or memory; DST a register
1655 or memory. If either operand is memory it must not have any side
1658 aarch64_split_128bit_move (rtx dst
, rtx src
)
1663 machine_mode mode
= GET_MODE (dst
);
1665 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1666 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1667 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1669 if (REG_P (dst
) && REG_P (src
))
1671 int src_regno
= REGNO (src
);
1672 int dst_regno
= REGNO (dst
);
1674 /* Handle FP <-> GP regs. */
1675 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1677 src_lo
= gen_lowpart (word_mode
, src
);
1678 src_hi
= gen_highpart (word_mode
, src
);
1682 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1683 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1687 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1688 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1692 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1694 dst_lo
= gen_lowpart (word_mode
, dst
);
1695 dst_hi
= gen_highpart (word_mode
, dst
);
1699 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1700 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1704 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1705 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1711 dst_lo
= gen_lowpart (word_mode
, dst
);
1712 dst_hi
= gen_highpart (word_mode
, dst
);
1713 src_lo
= gen_lowpart (word_mode
, src
);
1714 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1716 /* At most one pairing may overlap. */
1717 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1719 aarch64_emit_move (dst_hi
, src_hi
);
1720 aarch64_emit_move (dst_lo
, src_lo
);
1724 aarch64_emit_move (dst_lo
, src_lo
);
1725 aarch64_emit_move (dst_hi
, src_hi
);
1730 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1732 return (! REG_P (src
)
1733 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1736 /* Split a complex SIMD combine. */
1739 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1741 machine_mode src_mode
= GET_MODE (src1
);
1742 machine_mode dst_mode
= GET_MODE (dst
);
1744 gcc_assert (VECTOR_MODE_P (dst_mode
));
1745 gcc_assert (register_operand (dst
, dst_mode
)
1746 && register_operand (src1
, src_mode
)
1747 && register_operand (src2
, src_mode
));
1749 rtx (*gen
) (rtx
, rtx
, rtx
);
1754 gen
= gen_aarch64_simd_combinev8qi
;
1757 gen
= gen_aarch64_simd_combinev4hi
;
1760 gen
= gen_aarch64_simd_combinev2si
;
1763 gen
= gen_aarch64_simd_combinev4hf
;
1766 gen
= gen_aarch64_simd_combinev2sf
;
1769 gen
= gen_aarch64_simd_combinedi
;
1772 gen
= gen_aarch64_simd_combinedf
;
1778 emit_insn (gen (dst
, src1
, src2
));
1782 /* Split a complex SIMD move. */
1785 aarch64_split_simd_move (rtx dst
, rtx src
)
1787 machine_mode src_mode
= GET_MODE (src
);
1788 machine_mode dst_mode
= GET_MODE (dst
);
1790 gcc_assert (VECTOR_MODE_P (dst_mode
));
1792 if (REG_P (dst
) && REG_P (src
))
1794 rtx (*gen
) (rtx
, rtx
);
1796 gcc_assert (VECTOR_MODE_P (src_mode
));
1801 gen
= gen_aarch64_split_simd_movv16qi
;
1804 gen
= gen_aarch64_split_simd_movv8hi
;
1807 gen
= gen_aarch64_split_simd_movv4si
;
1810 gen
= gen_aarch64_split_simd_movv2di
;
1813 gen
= gen_aarch64_split_simd_movv8hf
;
1816 gen
= gen_aarch64_split_simd_movv4sf
;
1819 gen
= gen_aarch64_split_simd_movv2df
;
1825 emit_insn (gen (dst
, src
));
1831 aarch64_zero_extend_const_eq (machine_mode xmode
, rtx x
,
1832 machine_mode ymode
, rtx y
)
1834 rtx r
= simplify_const_unary_operation (ZERO_EXTEND
, xmode
, y
, ymode
);
1835 gcc_assert (r
!= NULL
);
1836 return rtx_equal_p (x
, r
);
1841 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1843 if (can_create_pseudo_p ())
1844 return force_reg (mode
, value
);
1847 x
= aarch64_emit_move (x
, value
);
1854 aarch64_add_offset (scalar_int_mode mode
, rtx temp
, rtx reg
,
1855 HOST_WIDE_INT offset
)
1857 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1860 /* Load the full offset into a register. This
1861 might be improvable in the future. */
1862 high
= GEN_INT (offset
);
1864 high
= aarch64_force_temporary (mode
, temp
, high
);
1865 reg
= aarch64_force_temporary (mode
, temp
,
1866 gen_rtx_PLUS (mode
, high
, reg
));
1868 return plus_constant (mode
, reg
, offset
);
1872 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1873 scalar_int_mode mode
)
1876 unsigned HOST_WIDE_INT val
, val2
, mask
;
1877 int one_match
, zero_match
;
1882 if (aarch64_move_imm (val
, mode
))
1885 emit_insn (gen_rtx_SET (dest
, imm
));
1889 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
1890 (with XXXX non-zero). In that case check to see if the move can be done in
1892 val2
= val
& 0xffffffff;
1894 && aarch64_move_imm (val2
, SImode
)
1895 && (((val
>> 32) & 0xffff) == 0 || (val
>> 48) == 0))
1898 emit_insn (gen_rtx_SET (dest
, GEN_INT (val2
)));
1900 /* Check if we have to emit a second instruction by checking to see
1901 if any of the upper 32 bits of the original DI mode value is set. */
1905 i
= (val
>> 48) ? 48 : 32;
1908 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1909 GEN_INT ((val
>> i
) & 0xffff)));
1914 if ((val
>> 32) == 0 || mode
== SImode
)
1918 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& 0xffff)));
1920 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1921 GEN_INT ((val
>> 16) & 0xffff)));
1923 emit_insn (gen_insv_immdi (dest
, GEN_INT (16),
1924 GEN_INT ((val
>> 16) & 0xffff)));
1929 /* Remaining cases are all for DImode. */
1932 zero_match
= ((val
& mask
) == 0) + ((val
& (mask
<< 16)) == 0) +
1933 ((val
& (mask
<< 32)) == 0) + ((val
& (mask
<< 48)) == 0);
1934 one_match
= ((~val
& mask
) == 0) + ((~val
& (mask
<< 16)) == 0) +
1935 ((~val
& (mask
<< 32)) == 0) + ((~val
& (mask
<< 48)) == 0);
1937 if (zero_match
!= 2 && one_match
!= 2)
1939 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1940 For a 64-bit bitmask try whether changing 16 bits to all ones or
1941 zeroes creates a valid bitmask. To check any repeated bitmask,
1942 try using 16 bits from the other 32-bit half of val. */
1944 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1947 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1950 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1952 val2
= val2
& ~mask
;
1953 val2
= val2
| (((val2
>> 32) | (val2
<< 32)) & mask
);
1954 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1961 emit_insn (gen_rtx_SET (dest
, GEN_INT (val2
)));
1962 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1963 GEN_INT ((val
>> i
) & 0xffff)));
1969 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1970 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1971 otherwise skip zero bits. */
1975 val2
= one_match
> zero_match
? ~val
: val
;
1976 i
= (val2
& mask
) != 0 ? 0 : (val2
& (mask
<< 16)) != 0 ? 16 : 32;
1979 emit_insn (gen_rtx_SET (dest
, GEN_INT (one_match
> zero_match
1980 ? (val
| ~(mask
<< i
))
1981 : (val
& (mask
<< i
)))));
1982 for (i
+= 16; i
< 64; i
+= 16)
1984 if ((val2
& (mask
<< i
)) == 0)
1987 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1988 GEN_INT ((val
>> i
) & 0xffff)));
1995 /* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1996 temporary value if necessary. FRAME_RELATED_P should be true if
1997 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1998 to the generated instructions. If SCRATCHREG is known to hold
1999 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2002 Since this function may be used to adjust the stack pointer, we must
2003 ensure that it cannot cause transient stack deallocation (for example
2004 by first incrementing SP and then decrementing when adjusting by a
2005 large immediate). */
2008 aarch64_add_constant_internal (scalar_int_mode mode
, int regnum
,
2009 int scratchreg
, HOST_WIDE_INT delta
,
2010 bool frame_related_p
, bool emit_move_imm
)
2012 HOST_WIDE_INT mdelta
= abs_hwi (delta
);
2013 rtx this_rtx
= gen_rtx_REG (mode
, regnum
);
2019 /* Single instruction adjustment. */
2020 if (aarch64_uimm12_shift (mdelta
))
2022 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
)));
2023 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2027 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2028 Only do this if mdelta is not a 16-bit move as adjusting using a move
2030 if (mdelta
< 0x1000000 && !aarch64_move_imm (mdelta
, mode
))
2032 HOST_WIDE_INT low_off
= mdelta
& 0xfff;
2034 low_off
= delta
< 0 ? -low_off
: low_off
;
2035 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (low_off
)));
2036 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2037 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
- low_off
)));
2038 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2042 /* Emit a move immediate if required and an addition/subtraction. */
2043 rtx scratch_rtx
= gen_rtx_REG (mode
, scratchreg
);
2045 aarch64_internal_mov_immediate (scratch_rtx
, GEN_INT (mdelta
), true, mode
);
2046 insn
= emit_insn (delta
< 0 ? gen_sub2_insn (this_rtx
, scratch_rtx
)
2047 : gen_add2_insn (this_rtx
, scratch_rtx
));
2048 if (frame_related_p
)
2050 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2051 rtx adj
= plus_constant (mode
, this_rtx
, delta
);
2052 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, gen_rtx_SET (this_rtx
, adj
));
2057 aarch64_add_constant (scalar_int_mode mode
, int regnum
, int scratchreg
,
2058 HOST_WIDE_INT delta
)
2060 aarch64_add_constant_internal (mode
, regnum
, scratchreg
, delta
, false, true);
2064 aarch64_add_sp (int scratchreg
, HOST_WIDE_INT delta
, bool emit_move_imm
)
2066 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, delta
,
2067 true, emit_move_imm
);
2071 aarch64_sub_sp (int scratchreg
, HOST_WIDE_INT delta
, bool frame_related_p
)
2073 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, -delta
,
2074 frame_related_p
, true);
2078 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
2080 machine_mode mode
= GET_MODE (dest
);
2082 gcc_assert (mode
== SImode
|| mode
== DImode
);
2084 /* Check on what type of symbol it is. */
2085 scalar_int_mode int_mode
;
2086 if ((GET_CODE (imm
) == SYMBOL_REF
2087 || GET_CODE (imm
) == LABEL_REF
2088 || GET_CODE (imm
) == CONST
)
2089 && is_a
<scalar_int_mode
> (mode
, &int_mode
))
2091 rtx mem
, base
, offset
;
2092 enum aarch64_symbol_type sty
;
2094 /* If we have (const (plus symbol offset)), separate out the offset
2095 before we start classifying the symbol. */
2096 split_const (imm
, &base
, &offset
);
2098 sty
= aarch64_classify_symbol (base
, offset
);
2101 case SYMBOL_FORCE_TO_MEM
:
2102 if (offset
!= const0_rtx
2103 && targetm
.cannot_force_const_mem (int_mode
, imm
))
2105 gcc_assert (can_create_pseudo_p ());
2106 base
= aarch64_force_temporary (int_mode
, dest
, base
);
2107 base
= aarch64_add_offset (int_mode
, NULL
, base
,
2109 aarch64_emit_move (dest
, base
);
2113 mem
= force_const_mem (ptr_mode
, imm
);
2116 /* If we aren't generating PC relative literals, then
2117 we need to expand the literal pool access carefully.
2118 This is something that needs to be done in a number
2119 of places, so could well live as a separate function. */
2120 if (!aarch64_pcrelative_literal_loads
)
2122 gcc_assert (can_create_pseudo_p ());
2123 base
= gen_reg_rtx (ptr_mode
);
2124 aarch64_expand_mov_immediate (base
, XEXP (mem
, 0));
2125 if (ptr_mode
!= Pmode
)
2126 base
= convert_memory_address (Pmode
, base
);
2127 mem
= gen_rtx_MEM (ptr_mode
, base
);
2130 if (int_mode
!= ptr_mode
)
2131 mem
= gen_rtx_ZERO_EXTEND (int_mode
, mem
);
2133 emit_insn (gen_rtx_SET (dest
, mem
));
2137 case SYMBOL_SMALL_TLSGD
:
2138 case SYMBOL_SMALL_TLSDESC
:
2139 case SYMBOL_SMALL_TLSIE
:
2140 case SYMBOL_SMALL_GOT_28K
:
2141 case SYMBOL_SMALL_GOT_4G
:
2142 case SYMBOL_TINY_GOT
:
2143 case SYMBOL_TINY_TLSIE
:
2144 if (offset
!= const0_rtx
)
2146 gcc_assert(can_create_pseudo_p ());
2147 base
= aarch64_force_temporary (int_mode
, dest
, base
);
2148 base
= aarch64_add_offset (int_mode
, NULL
, base
,
2150 aarch64_emit_move (dest
, base
);
2155 case SYMBOL_SMALL_ABSOLUTE
:
2156 case SYMBOL_TINY_ABSOLUTE
:
2157 case SYMBOL_TLSLE12
:
2158 case SYMBOL_TLSLE24
:
2159 case SYMBOL_TLSLE32
:
2160 case SYMBOL_TLSLE48
:
2161 aarch64_load_symref_appropriately (dest
, imm
, sty
);
2169 if (!CONST_INT_P (imm
))
2171 if (GET_CODE (imm
) == HIGH
)
2172 emit_insn (gen_rtx_SET (dest
, imm
));
2175 rtx mem
= force_const_mem (mode
, imm
);
2177 emit_insn (gen_rtx_SET (dest
, mem
));
2183 aarch64_internal_mov_immediate (dest
, imm
, true,
2184 as_a
<scalar_int_mode
> (mode
));
2188 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
2189 tree exp ATTRIBUTE_UNUSED
)
2191 /* Currently, always true. */
2195 /* Implement TARGET_PASS_BY_REFERENCE. */
2198 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
2201 bool named ATTRIBUTE_UNUSED
)
2204 machine_mode dummymode
;
2207 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2208 size
= (mode
== BLKmode
&& type
)
2209 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2211 /* Aggregates are passed by reference based on their size. */
2212 if (type
&& AGGREGATE_TYPE_P (type
))
2214 size
= int_size_in_bytes (type
);
2217 /* Variable sized arguments are always returned by reference. */
2221 /* Can this be a candidate to be passed in fp/simd register(s)? */
2222 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2227 /* Arguments which are variable sized or larger than 2 registers are
2228 passed by reference unless they are a homogenous floating point
2230 return size
> 2 * UNITS_PER_WORD
;
2233 /* Return TRUE if VALTYPE is padded to its least significant bits. */
2235 aarch64_return_in_msb (const_tree valtype
)
2237 machine_mode dummy_mode
;
2240 /* Never happens in little-endian mode. */
2241 if (!BYTES_BIG_ENDIAN
)
2244 /* Only composite types smaller than or equal to 16 bytes can
2245 be potentially returned in registers. */
2246 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
2247 || int_size_in_bytes (valtype
) <= 0
2248 || int_size_in_bytes (valtype
) > 16)
2251 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2252 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2253 is always passed/returned in the least significant bits of fp/simd
2255 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
2256 &dummy_mode
, &dummy_int
, NULL
))
2262 /* Implement TARGET_FUNCTION_VALUE.
2263 Define how to find the value returned by a function. */
2266 aarch64_function_value (const_tree type
, const_tree func
,
2267 bool outgoing ATTRIBUTE_UNUSED
)
2272 machine_mode ag_mode
;
2274 mode
= TYPE_MODE (type
);
2275 if (INTEGRAL_TYPE_P (type
))
2276 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
2278 if (aarch64_return_in_msb (type
))
2280 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2282 if (size
% UNITS_PER_WORD
!= 0)
2284 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
2285 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
2289 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2290 &ag_mode
, &count
, NULL
))
2292 if (!aarch64_composite_type_p (type
, mode
))
2294 gcc_assert (count
== 1 && mode
== ag_mode
);
2295 return gen_rtx_REG (mode
, V0_REGNUM
);
2302 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
2303 for (i
= 0; i
< count
; i
++)
2305 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
2306 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2307 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
2308 XVECEXP (par
, 0, i
) = tmp
;
2314 return gen_rtx_REG (mode
, R0_REGNUM
);
2317 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2318 Return true if REGNO is the number of a hard register in which the values
2319 of called function may come back. */
2322 aarch64_function_value_regno_p (const unsigned int regno
)
2324 /* Maximum of 16 bytes can be returned in the general registers. Examples
2325 of 16-byte return values are: 128-bit integers and 16-byte small
2326 structures (excluding homogeneous floating-point aggregates). */
2327 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
2330 /* Up to four fp/simd registers can return a function value, e.g. a
2331 homogeneous floating-point aggregate having four members. */
2332 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
2333 return TARGET_FLOAT
;
2338 /* Implement TARGET_RETURN_IN_MEMORY.
2340 If the type T of the result of a function is such that
2342 would require that arg be passed as a value in a register (or set of
2343 registers) according to the parameter passing rules, then the result
2344 is returned in the same registers as would be used for such an
2348 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
2351 machine_mode ag_mode
;
2354 if (!AGGREGATE_TYPE_P (type
)
2355 && TREE_CODE (type
) != COMPLEX_TYPE
2356 && TREE_CODE (type
) != VECTOR_TYPE
)
2357 /* Simple scalar types always returned in registers. */
2360 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
2367 /* Types larger than 2 registers returned in memory. */
2368 size
= int_size_in_bytes (type
);
2369 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
2373 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
2374 const_tree type
, int *nregs
)
2376 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2377 return aarch64_vfp_is_call_or_return_candidate (mode
,
2379 &pcum
->aapcs_vfp_rmode
,
2384 /* Given MODE and TYPE of a function argument, return the alignment in
2385 bits. The idea is to suppress any stronger alignment requested by
2386 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2387 This is a helper function for local use only. */
2390 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
2393 return GET_MODE_ALIGNMENT (mode
);
2395 if (integer_zerop (TYPE_SIZE (type
)))
2398 gcc_assert (TYPE_MODE (type
) == mode
);
2400 if (!AGGREGATE_TYPE_P (type
))
2401 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
));
2403 if (TREE_CODE (type
) == ARRAY_TYPE
)
2404 return TYPE_ALIGN (TREE_TYPE (type
));
2406 unsigned int alignment
= 0;
2407 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2408 if (TREE_CODE (field
) == FIELD_DECL
)
2409 alignment
= std::max (alignment
, DECL_ALIGN (field
));
2414 /* Layout a function argument according to the AAPCS64 rules. The rule
2415 numbers refer to the rule numbers in the AAPCS64. */
2418 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2420 bool named ATTRIBUTE_UNUSED
)
2422 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2423 int ncrn
, nvrn
, nregs
;
2424 bool allocate_ncrn
, allocate_nvrn
;
2427 /* We need to do this once per argument. */
2428 if (pcum
->aapcs_arg_processed
)
2431 pcum
->aapcs_arg_processed
= true;
2433 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2435 = ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
2438 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
2439 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
2444 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2445 The following code thus handles passing by SIMD/FP registers first. */
2447 nvrn
= pcum
->aapcs_nvrn
;
2449 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2450 and homogenous short-vector aggregates (HVA). */
2454 aarch64_err_no_fpadvsimd (mode
, "argument");
2456 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
2458 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
2459 if (!aarch64_composite_type_p (type
, mode
))
2461 gcc_assert (nregs
== 1);
2462 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
2468 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2469 for (i
= 0; i
< nregs
; i
++)
2471 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
2472 V0_REGNUM
+ nvrn
+ i
);
2473 tmp
= gen_rtx_EXPR_LIST
2475 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
2476 XVECEXP (par
, 0, i
) = tmp
;
2478 pcum
->aapcs_reg
= par
;
2484 /* C.3 NSRN is set to 8. */
2485 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
2490 ncrn
= pcum
->aapcs_ncrn
;
2491 nregs
= size
/ UNITS_PER_WORD
;
2493 /* C6 - C9. though the sign and zero extension semantics are
2494 handled elsewhere. This is the case where the argument fits
2495 entirely general registers. */
2496 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
2499 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
2501 /* C.8 if the argument has an alignment of 16 then the NGRN is
2502 rounded up to the next even number. */
2505 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
2506 comparison is there because for > 16 * BITS_PER_UNIT
2507 alignment nregs should be > 2 and therefore it should be
2508 passed by reference rather than value. */
2509 && aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
2512 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
2515 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2516 A reg is still generated for it, but the caller should be smart
2517 enough not to use it. */
2518 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
2519 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
2525 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2526 for (i
= 0; i
< nregs
; i
++)
2528 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
2529 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2530 GEN_INT (i
* UNITS_PER_WORD
));
2531 XVECEXP (par
, 0, i
) = tmp
;
2533 pcum
->aapcs_reg
= par
;
2536 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
2541 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
2543 /* The argument is passed on stack; record the needed number of words for
2544 this argument and align the total size if necessary. */
2546 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
2548 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
2549 pcum
->aapcs_stack_size
= ROUND_UP (pcum
->aapcs_stack_size
,
2550 16 / UNITS_PER_WORD
);
2554 /* Implement TARGET_FUNCTION_ARG. */
2557 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2558 const_tree type
, bool named
)
2560 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2561 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
2563 if (mode
== VOIDmode
)
2566 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2567 return pcum
->aapcs_reg
;
2571 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
2572 const_tree fntype ATTRIBUTE_UNUSED
,
2573 rtx libname ATTRIBUTE_UNUSED
,
2574 const_tree fndecl ATTRIBUTE_UNUSED
,
2575 unsigned n_named ATTRIBUTE_UNUSED
)
2577 pcum
->aapcs_ncrn
= 0;
2578 pcum
->aapcs_nvrn
= 0;
2579 pcum
->aapcs_nextncrn
= 0;
2580 pcum
->aapcs_nextnvrn
= 0;
2581 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
2582 pcum
->aapcs_reg
= NULL_RTX
;
2583 pcum
->aapcs_arg_processed
= false;
2584 pcum
->aapcs_stack_words
= 0;
2585 pcum
->aapcs_stack_size
= 0;
2588 && fndecl
&& TREE_PUBLIC (fndecl
)
2589 && fntype
&& fntype
!= error_mark_node
)
2591 const_tree type
= TREE_TYPE (fntype
);
2592 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
2593 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
2594 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
2595 &mode
, &nregs
, NULL
))
2596 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
2602 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
2607 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2608 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
2610 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2611 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
2612 != (pcum
->aapcs_stack_words
!= 0));
2613 pcum
->aapcs_arg_processed
= false;
2614 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
2615 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
2616 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
2617 pcum
->aapcs_stack_words
= 0;
2618 pcum
->aapcs_reg
= NULL_RTX
;
2623 aarch64_function_arg_regno_p (unsigned regno
)
2625 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
2626 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
2629 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2630 PARM_BOUNDARY bits of alignment, but will be given anything up
2631 to STACK_BOUNDARY bits if the type requires it. This makes sure
2632 that both before and after the layout of each argument, the Next
2633 Stacked Argument Address (NSAA) will have a minimum alignment of
2637 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
2639 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
2640 return MIN (MAX (alignment
, PARM_BOUNDARY
), STACK_BOUNDARY
);
2643 /* Implement TARGET_FUNCTION_ARG_PADDING.
2645 Small aggregate types are placed in the lowest memory address.
2647 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2649 static pad_direction
2650 aarch64_function_arg_padding (machine_mode mode
, const_tree type
)
2652 /* On little-endian targets, the least significant byte of every stack
2653 argument is passed at the lowest byte address of the stack slot. */
2654 if (!BYTES_BIG_ENDIAN
)
2657 /* Otherwise, integral, floating-point and pointer types are padded downward:
2658 the least significant byte of a stack argument is passed at the highest
2659 byte address of the stack slot. */
2661 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2662 || POINTER_TYPE_P (type
))
2663 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2664 return PAD_DOWNWARD
;
2666 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2670 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2672 It specifies padding for the last (may also be the only)
2673 element of a block move between registers and memory. If
2674 assuming the block is in the memory, padding upward means that
2675 the last element is padded after its highest significant byte,
2676 while in downward padding, the last element is padded at the
2677 its least significant byte side.
2679 Small aggregates and small complex types are always padded
2682 We don't need to worry about homogeneous floating-point or
2683 short-vector aggregates; their move is not affected by the
2684 padding direction determined here. Regardless of endianness,
2685 each element of such an aggregate is put in the least
2686 significant bits of a fp/simd register.
2688 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2689 register has useful data, and return the opposite if the most
2690 significant byte does. */
2693 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2694 bool first ATTRIBUTE_UNUSED
)
2697 /* Small composite types are always padded upward. */
2698 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2700 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2701 : GET_MODE_SIZE (mode
));
2702 if (size
< 2 * UNITS_PER_WORD
)
2706 /* Otherwise, use the default padding. */
2707 return !BYTES_BIG_ENDIAN
;
2710 static scalar_int_mode
2711 aarch64_libgcc_cmp_return_mode (void)
2716 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2718 /* We use the 12-bit shifted immediate arithmetic instructions so values
2719 must be multiple of (1 << 12), i.e. 4096. */
2720 #define ARITH_FACTOR 4096
2722 #if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2723 #error Cannot use simple address calculation for stack probing
2726 /* The pair of scratch registers used for stack probing. */
2727 #define PROBE_STACK_FIRST_REG 9
2728 #define PROBE_STACK_SECOND_REG 10
2730 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2731 inclusive. These are offsets from the current stack pointer. */
2734 aarch64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
2736 rtx reg1
= gen_rtx_REG (Pmode
, PROBE_STACK_FIRST_REG
);
2738 /* See the same assertion on PROBE_INTERVAL above. */
2739 gcc_assert ((first
% ARITH_FACTOR
) == 0);
2741 /* See if we have a constant small number of probes to generate. If so,
2742 that's the easy case. */
2743 if (size
<= PROBE_INTERVAL
)
2745 const HOST_WIDE_INT base
= ROUND_UP (size
, ARITH_FACTOR
);
2747 emit_set_insn (reg1
,
2748 plus_constant (Pmode
,
2749 stack_pointer_rtx
, -(first
+ base
)));
2750 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- size
));
2753 /* The run-time loop is made up of 8 insns in the generic case while the
2754 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2755 else if (size
<= 4 * PROBE_INTERVAL
)
2757 HOST_WIDE_INT i
, rem
;
2759 emit_set_insn (reg1
,
2760 plus_constant (Pmode
,
2762 -(first
+ PROBE_INTERVAL
)));
2763 emit_stack_probe (reg1
);
2765 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2766 it exceeds SIZE. If only two probes are needed, this will not
2767 generate any code. Then probe at FIRST + SIZE. */
2768 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
2770 emit_set_insn (reg1
,
2771 plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
2772 emit_stack_probe (reg1
);
2775 rem
= size
- (i
- PROBE_INTERVAL
);
2778 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2780 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -base
));
2781 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- rem
));
2784 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
2787 /* Otherwise, do the same as above, but in a loop. Note that we must be
2788 extra careful with variables wrapping around because we might be at
2789 the very top (or the very bottom) of the address space and we have
2790 to be able to handle this case properly; in particular, we use an
2791 equality test for the loop condition. */
2794 rtx reg2
= gen_rtx_REG (Pmode
, PROBE_STACK_SECOND_REG
);
2796 /* Step 1: round SIZE to the previous multiple of the interval. */
2798 HOST_WIDE_INT rounded_size
= size
& -PROBE_INTERVAL
;
2801 /* Step 2: compute initial and final value of the loop counter. */
2803 /* TEST_ADDR = SP + FIRST. */
2804 emit_set_insn (reg1
,
2805 plus_constant (Pmode
, stack_pointer_rtx
, -first
));
2807 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2808 HOST_WIDE_INT adjustment
= - (first
+ rounded_size
);
2809 if (! aarch64_uimm12_shift (adjustment
))
2811 aarch64_internal_mov_immediate (reg2
, GEN_INT (adjustment
),
2813 emit_set_insn (reg2
, gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, reg2
));
2817 emit_set_insn (reg2
,
2818 plus_constant (Pmode
, stack_pointer_rtx
, adjustment
));
2825 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2828 while (TEST_ADDR != LAST_ADDR)
2830 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2831 until it is equal to ROUNDED_SIZE. */
2833 emit_insn (gen_probe_stack_range (reg1
, reg1
, reg2
));
2836 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2837 that SIZE is equal to ROUNDED_SIZE. */
2839 if (size
!= rounded_size
)
2841 HOST_WIDE_INT rem
= size
- rounded_size
;
2845 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2847 emit_set_insn (reg2
, plus_constant (Pmode
, reg2
, -base
));
2848 emit_stack_probe (plus_constant (Pmode
, reg2
, base
- rem
));
2851 emit_stack_probe (plus_constant (Pmode
, reg2
, -rem
));
2855 /* Make sure nothing is scheduled before we are done. */
2856 emit_insn (gen_blockage ());
2859 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2860 absolute addresses. */
2863 aarch64_output_probe_stack_range (rtx reg1
, rtx reg2
)
2865 static int labelno
= 0;
2869 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
2872 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
2874 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2876 xops
[1] = GEN_INT (PROBE_INTERVAL
);
2877 output_asm_insn ("sub\t%0, %0, %1", xops
);
2879 /* Probe at TEST_ADDR. */
2880 output_asm_insn ("str\txzr, [%0]", xops
);
2882 /* Test if TEST_ADDR == LAST_ADDR. */
2884 output_asm_insn ("cmp\t%0, %1", xops
);
2887 fputs ("\tb.ne\t", asm_out_file
);
2888 assemble_name_raw (asm_out_file
, loop_lab
);
2889 fputc ('\n', asm_out_file
);
2894 /* Mark the registers that need to be saved by the callee and calculate
2895 the size of the callee-saved registers area and frame record (both FP
2896 and LR may be omitted). */
2898 aarch64_layout_frame (void)
2900 HOST_WIDE_INT offset
= 0;
2901 int regno
, last_fp_reg
= INVALID_REGNUM
;
2903 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2906 /* Force a frame chain for EH returns so the return address is at FP+8. */
2907 cfun
->machine
->frame
.emit_frame_chain
2908 = frame_pointer_needed
|| crtl
->calls_eh_return
;
2910 /* Emit a frame chain if the frame pointer is enabled.
2911 If -momit-leaf-frame-pointer is used, do not use a frame chain
2912 in leaf functions which do not use LR. */
2913 if (flag_omit_frame_pointer
== 2
2914 && !(flag_omit_leaf_frame_pointer
&& crtl
->is_leaf
2915 && !df_regs_ever_live_p (LR_REGNUM
)))
2916 cfun
->machine
->frame
.emit_frame_chain
= true;
2918 #define SLOT_NOT_REQUIRED (-2)
2919 #define SLOT_REQUIRED (-1)
2921 cfun
->machine
->frame
.wb_candidate1
= INVALID_REGNUM
;
2922 cfun
->machine
->frame
.wb_candidate2
= INVALID_REGNUM
;
2924 /* First mark all the registers that really need to be saved... */
2925 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2926 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2928 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2929 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2931 /* ... that includes the eh data registers (if needed)... */
2932 if (crtl
->calls_eh_return
)
2933 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2934 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2937 /* ... and any callee saved register that dataflow says is live. */
2938 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2939 if (df_regs_ever_live_p (regno
)
2940 && (regno
== R30_REGNUM
2941 || !call_used_regs
[regno
]))
2942 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2944 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2945 if (df_regs_ever_live_p (regno
)
2946 && !call_used_regs
[regno
])
2948 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2949 last_fp_reg
= regno
;
2952 if (cfun
->machine
->frame
.emit_frame_chain
)
2954 /* FP and LR are placed in the linkage record. */
2955 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2956 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2957 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2958 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2959 offset
= 2 * UNITS_PER_WORD
;
2962 /* Now assign stack slots for them. */
2963 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2964 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2966 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2967 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2968 cfun
->machine
->frame
.wb_candidate1
= regno
;
2969 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
)
2970 cfun
->machine
->frame
.wb_candidate2
= regno
;
2971 offset
+= UNITS_PER_WORD
;
2974 HOST_WIDE_INT max_int_offset
= offset
;
2975 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2976 bool has_align_gap
= offset
!= max_int_offset
;
2978 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2979 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2981 /* If there is an alignment gap between integer and fp callee-saves,
2982 allocate the last fp register to it if possible. */
2983 if (regno
== last_fp_reg
&& has_align_gap
&& (offset
& 8) == 0)
2985 cfun
->machine
->frame
.reg_offset
[regno
] = max_int_offset
;
2989 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2990 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2991 cfun
->machine
->frame
.wb_candidate1
= regno
;
2992 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
2993 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2994 cfun
->machine
->frame
.wb_candidate2
= regno
;
2995 offset
+= UNITS_PER_WORD
;
2998 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
3000 cfun
->machine
->frame
.saved_regs_size
= offset
;
3002 HOST_WIDE_INT varargs_and_saved_regs_size
3003 = offset
+ cfun
->machine
->frame
.saved_varargs_size
;
3005 cfun
->machine
->frame
.hard_fp_offset
3006 = ROUND_UP (varargs_and_saved_regs_size
+ get_frame_size (),
3007 STACK_BOUNDARY
/ BITS_PER_UNIT
);
3009 cfun
->machine
->frame
.frame_size
3010 = ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
3011 + crtl
->outgoing_args_size
,
3012 STACK_BOUNDARY
/ BITS_PER_UNIT
);
3014 cfun
->machine
->frame
.locals_offset
= cfun
->machine
->frame
.saved_varargs_size
;
3016 cfun
->machine
->frame
.initial_adjust
= 0;
3017 cfun
->machine
->frame
.final_adjust
= 0;
3018 cfun
->machine
->frame
.callee_adjust
= 0;
3019 cfun
->machine
->frame
.callee_offset
= 0;
3021 HOST_WIDE_INT max_push_offset
= 0;
3022 if (cfun
->machine
->frame
.wb_candidate2
!= INVALID_REGNUM
)
3023 max_push_offset
= 512;
3024 else if (cfun
->machine
->frame
.wb_candidate1
!= INVALID_REGNUM
)
3025 max_push_offset
= 256;
3027 if (cfun
->machine
->frame
.frame_size
< max_push_offset
3028 && crtl
->outgoing_args_size
== 0)
3030 /* Simple, small frame with no outgoing arguments:
3031 stp reg1, reg2, [sp, -frame_size]!
3032 stp reg3, reg4, [sp, 16] */
3033 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.frame_size
;
3035 else if ((crtl
->outgoing_args_size
3036 + cfun
->machine
->frame
.saved_regs_size
< 512)
3037 && !(cfun
->calls_alloca
3038 && cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
))
3040 /* Frame with small outgoing arguments:
3041 sub sp, sp, frame_size
3042 stp reg1, reg2, [sp, outgoing_args_size]
3043 stp reg3, reg4, [sp, outgoing_args_size + 16] */
3044 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.frame_size
;
3045 cfun
->machine
->frame
.callee_offset
3046 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.hard_fp_offset
;
3048 else if (cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
)
3050 /* Frame with large outgoing arguments but a small local area:
3051 stp reg1, reg2, [sp, -hard_fp_offset]!
3052 stp reg3, reg4, [sp, 16]
3053 sub sp, sp, outgoing_args_size */
3054 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
3055 cfun
->machine
->frame
.final_adjust
3056 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.callee_adjust
;
3060 /* Frame with large local area and outgoing arguments using frame pointer:
3061 sub sp, sp, hard_fp_offset
3062 stp x29, x30, [sp, 0]
3064 stp reg3, reg4, [sp, 16]
3065 sub sp, sp, outgoing_args_size */
3066 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
3067 cfun
->machine
->frame
.final_adjust
3068 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.initial_adjust
;
3071 cfun
->machine
->frame
.laid_out
= true;
3074 /* Return true if the register REGNO is saved on entry to
3075 the current function. */
3078 aarch64_register_saved_on_entry (int regno
)
3080 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
3083 /* Return the next register up from REGNO up to LIMIT for the callee
3087 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
3089 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
3094 /* Push the register number REGNO of mode MODE to the stack with write-back
3095 adjusting the stack by ADJUSTMENT. */
3098 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
3099 HOST_WIDE_INT adjustment
)
3101 rtx base_rtx
= stack_pointer_rtx
;
3104 reg
= gen_rtx_REG (mode
, regno
);
3105 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
3106 plus_constant (Pmode
, base_rtx
, -adjustment
));
3107 mem
= gen_frame_mem (mode
, mem
);
3109 insn
= emit_move_insn (mem
, reg
);
3110 RTX_FRAME_RELATED_P (insn
) = 1;
3113 /* Generate and return an instruction to store the pair of registers
3114 REG and REG2 of mode MODE to location BASE with write-back adjusting
3115 the stack location BASE by ADJUSTMENT. */
3118 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3119 HOST_WIDE_INT adjustment
)
3124 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
3125 GEN_INT (-adjustment
),
3126 GEN_INT (UNITS_PER_WORD
- adjustment
));
3128 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
3129 GEN_INT (-adjustment
),
3130 GEN_INT (UNITS_PER_WORD
- adjustment
));
3136 /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3137 stack pointer by ADJUSTMENT. */
3140 aarch64_push_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
)
3143 machine_mode mode
= (regno1
<= R30_REGNUM
) ? E_DImode
: E_DFmode
;
3145 if (regno2
== INVALID_REGNUM
)
3146 return aarch64_pushwb_single_reg (mode
, regno1
, adjustment
);
3148 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3149 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3151 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
3153 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
3154 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3155 RTX_FRAME_RELATED_P (insn
) = 1;
3158 /* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3159 adjusting it by ADJUSTMENT afterwards. */
3162 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3163 HOST_WIDE_INT adjustment
)
3168 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3169 GEN_INT (UNITS_PER_WORD
));
3171 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3172 GEN_INT (UNITS_PER_WORD
));
3178 /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3179 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3183 aarch64_pop_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
,
3186 machine_mode mode
= (regno1
<= R30_REGNUM
) ? E_DImode
: E_DFmode
;
3187 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3189 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg1
, *cfi_ops
);
3191 if (regno2
== INVALID_REGNUM
)
3193 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, adjustment
);
3194 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
3195 emit_move_insn (reg1
, gen_frame_mem (mode
, mem
));
3199 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3200 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3201 emit_insn (aarch64_gen_loadwb_pair (mode
, stack_pointer_rtx
, reg1
,
3206 /* Generate and return a store pair instruction of mode MODE to store
3207 register REG1 to MEM1 and register REG2 to MEM2. */
3210 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
3216 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
3219 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
3226 /* Generate and regurn a load pair isntruction of mode MODE to load register
3227 REG1 from MEM1 and register REG2 from MEM2. */
3230 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
3236 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
3239 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
3246 /* Return TRUE if return address signing should be enabled for the current
3247 function, otherwise return FALSE. */
3250 aarch64_return_address_signing_enabled (void)
3252 /* This function should only be called after frame laid out. */
3253 gcc_assert (cfun
->machine
->frame
.laid_out
);
3255 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3256 if it's LR is pushed onto stack. */
3257 return (aarch64_ra_sign_scope
== AARCH64_FUNCTION_ALL
3258 || (aarch64_ra_sign_scope
== AARCH64_FUNCTION_NON_LEAF
3259 && cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] >= 0));
3262 /* Emit code to save the callee-saved registers from register number START
3263 to LIMIT to the stack at the location starting at offset START_OFFSET,
3264 skipping any write-back candidates if SKIP_WB is true. */
3267 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
3268 unsigned start
, unsigned limit
, bool skip_wb
)
3274 for (regno
= aarch64_next_callee_save (start
, limit
);
3276 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3279 HOST_WIDE_INT offset
;
3282 && (regno
== cfun
->machine
->frame
.wb_candidate1
3283 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3286 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3289 reg
= gen_rtx_REG (mode
, regno
);
3290 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3291 mem
= gen_frame_mem (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3294 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3297 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3298 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3299 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3302 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3305 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3306 mem2
= gen_frame_mem (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3308 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
3311 /* The first part of a frame-related parallel insn is
3312 always assumed to be relevant to the frame
3313 calculations; subsequent parts, are only
3314 frame-related if explicitly marked. */
3315 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3319 insn
= emit_move_insn (mem
, reg
);
3321 RTX_FRAME_RELATED_P (insn
) = 1;
3325 /* Emit code to restore the callee registers of mode MODE from register
3326 number START up to and including LIMIT. Restore from the stack offset
3327 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3328 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3331 aarch64_restore_callee_saves (machine_mode mode
,
3332 HOST_WIDE_INT start_offset
, unsigned start
,
3333 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
3335 rtx base_rtx
= stack_pointer_rtx
;
3338 HOST_WIDE_INT offset
;
3340 for (regno
= aarch64_next_callee_save (start
, limit
);
3342 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3344 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3350 && (regno
== cfun
->machine
->frame
.wb_candidate1
3351 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3354 reg
= gen_rtx_REG (mode
, regno
);
3355 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3356 mem
= gen_frame_mem (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3358 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3361 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3362 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3363 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3365 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3368 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3369 mem2
= gen_frame_mem (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3370 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3372 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3376 emit_move_insn (reg
, mem
);
3377 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
3382 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3383 HOST_WIDE_INT offset
)
3385 return offset
>= -256 && offset
< 256;
3389 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3392 && offset
< 4096 * GET_MODE_SIZE (mode
)
3393 && offset
% GET_MODE_SIZE (mode
) == 0);
3397 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3399 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3400 && offset
< 64 * GET_MODE_SIZE (mode
)
3401 && offset
% GET_MODE_SIZE (mode
) == 0);
3404 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3407 aarch64_get_separate_components (void)
3409 aarch64_layout_frame ();
3411 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3412 bitmap_clear (components
);
3414 /* The registers we need saved to the frame. */
3415 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3416 if (aarch64_register_saved_on_entry (regno
))
3418 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3419 if (!frame_pointer_needed
)
3420 offset
+= cfun
->machine
->frame
.frame_size
3421 - cfun
->machine
->frame
.hard_fp_offset
;
3422 /* Check that we can access the stack slot of the register with one
3423 direct load with no adjustments needed. */
3424 if (offset_12bit_unsigned_scaled_p (DImode
, offset
))
3425 bitmap_set_bit (components
, regno
);
3428 /* Don't mess with the hard frame pointer. */
3429 if (frame_pointer_needed
)
3430 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
3432 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3433 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3434 /* If aarch64_layout_frame has chosen registers to store/restore with
3435 writeback don't interfere with them to avoid having to output explicit
3436 stack adjustment instructions. */
3437 if (reg2
!= INVALID_REGNUM
)
3438 bitmap_clear_bit (components
, reg2
);
3439 if (reg1
!= INVALID_REGNUM
)
3440 bitmap_clear_bit (components
, reg1
);
3442 bitmap_clear_bit (components
, LR_REGNUM
);
3443 bitmap_clear_bit (components
, SP_REGNUM
);
3448 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3451 aarch64_components_for_bb (basic_block bb
)
3453 bitmap in
= DF_LIVE_IN (bb
);
3454 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
3455 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
3457 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3458 bitmap_clear (components
);
3460 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3461 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3462 if ((!call_used_regs
[regno
])
3463 && (bitmap_bit_p (in
, regno
)
3464 || bitmap_bit_p (gen
, regno
)
3465 || bitmap_bit_p (kill
, regno
)))
3466 bitmap_set_bit (components
, regno
);
3471 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3472 Nothing to do for aarch64. */
3475 aarch64_disqualify_components (sbitmap
, edge
, sbitmap
, bool)
3479 /* Return the next set bit in BMP from START onwards. Return the total number
3480 of bits in BMP if no set bit is found at or after START. */
3483 aarch64_get_next_set_bit (sbitmap bmp
, unsigned int start
)
3485 unsigned int nbits
= SBITMAP_SIZE (bmp
);
3489 gcc_assert (start
< nbits
);
3490 for (unsigned int i
= start
; i
< nbits
; i
++)
3491 if (bitmap_bit_p (bmp
, i
))
3497 /* Do the work for aarch64_emit_prologue_components and
3498 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3499 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3500 for these components or the epilogue sequence. That is, it determines
3501 whether we should emit stores or loads and what kind of CFA notes to attach
3502 to the insns. Otherwise the logic for the two sequences is very
3506 aarch64_process_components (sbitmap components
, bool prologue_p
)
3508 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
3509 ? HARD_FRAME_POINTER_REGNUM
3510 : STACK_POINTER_REGNUM
);
3512 unsigned last_regno
= SBITMAP_SIZE (components
);
3513 unsigned regno
= aarch64_get_next_set_bit (components
, R0_REGNUM
);
3514 rtx_insn
*insn
= NULL
;
3516 while (regno
!= last_regno
)
3518 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3519 so DFmode for the vector registers is enough. */
3520 machine_mode mode
= GP_REGNUM_P (regno
) ? E_DImode
: E_DFmode
;
3521 rtx reg
= gen_rtx_REG (mode
, regno
);
3522 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3523 if (!frame_pointer_needed
)
3524 offset
+= cfun
->machine
->frame
.frame_size
3525 - cfun
->machine
->frame
.hard_fp_offset
;
3526 rtx addr
= plus_constant (Pmode
, ptr_reg
, offset
);
3527 rtx mem
= gen_frame_mem (mode
, addr
);
3529 rtx set
= prologue_p
? gen_rtx_SET (mem
, reg
) : gen_rtx_SET (reg
, mem
);
3530 unsigned regno2
= aarch64_get_next_set_bit (components
, regno
+ 1);
3531 /* No more registers to handle after REGNO.
3532 Emit a single save/restore and exit. */
3533 if (regno2
== last_regno
)
3535 insn
= emit_insn (set
);
3536 RTX_FRAME_RELATED_P (insn
) = 1;
3538 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3540 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3544 HOST_WIDE_INT offset2
= cfun
->machine
->frame
.reg_offset
[regno2
];
3545 /* The next register is not of the same class or its offset is not
3546 mergeable with the current one into a pair. */
3547 if (!satisfies_constraint_Ump (mem
)
3548 || GP_REGNUM_P (regno
) != GP_REGNUM_P (regno2
)
3549 || (offset2
- cfun
->machine
->frame
.reg_offset
[regno
])
3550 != GET_MODE_SIZE (mode
))
3552 insn
= emit_insn (set
);
3553 RTX_FRAME_RELATED_P (insn
) = 1;
3555 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3557 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3563 /* REGNO2 can be saved/restored in a pair with REGNO. */
3564 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3565 if (!frame_pointer_needed
)
3566 offset2
+= cfun
->machine
->frame
.frame_size
3567 - cfun
->machine
->frame
.hard_fp_offset
;
3568 rtx addr2
= plus_constant (Pmode
, ptr_reg
, offset2
);
3569 rtx mem2
= gen_frame_mem (mode
, addr2
);
3570 rtx set2
= prologue_p
? gen_rtx_SET (mem2
, reg2
)
3571 : gen_rtx_SET (reg2
, mem2
);
3574 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
, reg2
));
3576 insn
= emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3578 RTX_FRAME_RELATED_P (insn
) = 1;
3581 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
3582 add_reg_note (insn
, REG_CFA_OFFSET
, set2
);
3586 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3587 add_reg_note (insn
, REG_CFA_RESTORE
, reg2
);
3590 regno
= aarch64_get_next_set_bit (components
, regno2
+ 1);
3594 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3597 aarch64_emit_prologue_components (sbitmap components
)
3599 aarch64_process_components (components
, true);
3602 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3605 aarch64_emit_epilogue_components (sbitmap components
)
3607 aarch64_process_components (components
, false);
3610 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3613 aarch64_set_handled_components (sbitmap components
)
3615 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3616 if (bitmap_bit_p (components
, regno
))
3617 cfun
->machine
->reg_is_wrapped_separately
[regno
] = true;
3620 /* AArch64 stack frames generated by this compiler look like:
3622 +-------------------------------+
3624 | incoming stack arguments |
3626 +-------------------------------+
3627 | | <-- incoming stack pointer (aligned)
3628 | callee-allocated save area |
3629 | for register varargs |
3631 +-------------------------------+
3632 | local variables | <-- frame_pointer_rtx
3634 +-------------------------------+
3636 +-------------------------------+ |
3637 | callee-saved registers | | frame.saved_regs_size
3638 +-------------------------------+ |
3640 +-------------------------------+ |
3641 | FP' | / <- hard_frame_pointer_rtx (aligned)
3642 +-------------------------------+
3643 | dynamic allocation |
3644 +-------------------------------+
3646 +-------------------------------+
3647 | outgoing stack arguments | <-- arg_pointer
3649 +-------------------------------+
3650 | | <-- stack_pointer_rtx (aligned)
3652 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3653 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3656 /* Generate the prologue instructions for entry into a function.
3657 Establish the stack frame by decreasing the stack pointer with a
3658 properly calculated size and, if necessary, create a frame record
3659 filled with the values of LR and previous frame pointer. The
3660 current FP is also set up if it is in use. */
3663 aarch64_expand_prologue (void)
3665 aarch64_layout_frame ();
3667 HOST_WIDE_INT frame_size
= cfun
->machine
->frame
.frame_size
;
3668 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3669 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3670 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3671 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3672 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3673 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3674 bool emit_frame_chain
= cfun
->machine
->frame
.emit_frame_chain
;
3677 /* Sign return address for functions. */
3678 if (aarch64_return_address_signing_enabled ())
3680 insn
= emit_insn (gen_pacisp ());
3681 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3682 RTX_FRAME_RELATED_P (insn
) = 1;
3685 if (flag_stack_usage_info
)
3686 current_function_static_stack_size
= frame_size
;
3688 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3690 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3692 if (frame_size
> PROBE_INTERVAL
3693 && frame_size
> get_stack_check_protect ())
3694 aarch64_emit_probe_stack_range (get_stack_check_protect (),
3696 - get_stack_check_protect ()));
3698 else if (frame_size
> 0)
3699 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size
);
3702 aarch64_sub_sp (IP0_REGNUM
, initial_adjust
, true);
3704 if (callee_adjust
!= 0)
3705 aarch64_push_regs (reg1
, reg2
, callee_adjust
);
3707 if (emit_frame_chain
)
3709 if (callee_adjust
== 0)
3710 aarch64_save_callee_saves (DImode
, callee_offset
, R29_REGNUM
,
3712 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
3714 GEN_INT (callee_offset
)));
3715 RTX_FRAME_RELATED_P (insn
) = frame_pointer_needed
;
3716 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
3719 aarch64_save_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3720 callee_adjust
!= 0 || emit_frame_chain
);
3721 aarch64_save_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3722 callee_adjust
!= 0 || emit_frame_chain
);
3723 aarch64_sub_sp (IP1_REGNUM
, final_adjust
, !frame_pointer_needed
);
3726 /* Return TRUE if we can use a simple_return insn.
3728 This function checks whether the callee saved stack is empty, which
3729 means no restore actions are need. The pro_and_epilogue will use
3730 this to check whether shrink-wrapping opt is feasible. */
3733 aarch64_use_return_insn_p (void)
3735 if (!reload_completed
)
3741 aarch64_layout_frame ();
3743 return cfun
->machine
->frame
.frame_size
== 0;
3746 /* Generate the epilogue instructions for returning from a function.
3747 This is almost exactly the reverse of the prolog sequence, except
3748 that we need to insert barriers to avoid scheduling loads that read
3749 from a deallocated stack, and we optimize the unwind records by
3750 emitting them all together if possible. */
3752 aarch64_expand_epilogue (bool for_sibcall
)
3754 aarch64_layout_frame ();
3756 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3757 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3758 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3759 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3760 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3761 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3765 /* We need to add memory barrier to prevent read from deallocated stack. */
3766 bool need_barrier_p
= (get_frame_size ()
3767 + cfun
->machine
->frame
.saved_varargs_size
) != 0;
3769 /* Emit a barrier to prevent loads from a deallocated stack. */
3770 if (final_adjust
> crtl
->outgoing_args_size
|| cfun
->calls_alloca
3771 || crtl
->calls_eh_return
)
3773 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3774 need_barrier_p
= false;
3777 /* Restore the stack pointer from the frame pointer if it may not
3778 be the same as the stack pointer. */
3779 if (frame_pointer_needed
&& (final_adjust
|| cfun
->calls_alloca
))
3781 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
3782 hard_frame_pointer_rtx
,
3783 GEN_INT (-callee_offset
)));
3784 /* If writeback is used when restoring callee-saves, the CFA
3785 is restored on the instruction doing the writeback. */
3786 RTX_FRAME_RELATED_P (insn
) = callee_adjust
== 0;
3789 aarch64_add_sp (IP1_REGNUM
, final_adjust
, df_regs_ever_live_p (IP1_REGNUM
));
3791 aarch64_restore_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3792 callee_adjust
!= 0, &cfi_ops
);
3793 aarch64_restore_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3794 callee_adjust
!= 0, &cfi_ops
);
3797 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3799 if (callee_adjust
!= 0)
3800 aarch64_pop_regs (reg1
, reg2
, callee_adjust
, &cfi_ops
);
3802 if (callee_adjust
!= 0 || initial_adjust
> 65536)
3804 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
3805 insn
= get_last_insn ();
3806 rtx new_cfa
= plus_constant (Pmode
, stack_pointer_rtx
, initial_adjust
);
3807 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
3808 RTX_FRAME_RELATED_P (insn
) = 1;
3812 aarch64_add_sp (IP0_REGNUM
, initial_adjust
, df_regs_ever_live_p (IP0_REGNUM
));
3816 /* Emit delayed restores and reset the CFA to be SP. */
3817 insn
= get_last_insn ();
3818 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, stack_pointer_rtx
, cfi_ops
);
3819 REG_NOTES (insn
) = cfi_ops
;
3820 RTX_FRAME_RELATED_P (insn
) = 1;
3823 /* We prefer to emit the combined return/authenticate instruction RETAA,
3824 however there are three cases in which we must instead emit an explicit
3825 authentication instruction.
3827 1) Sibcalls don't return in a normal way, so if we're about to call one
3828 we must authenticate.
3830 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3831 generating code for !TARGET_ARMV8_3 we can't use it and must
3832 explicitly authenticate.
3834 3) On an eh_return path we make extra stack adjustments to update the
3835 canonical frame address to be the exception handler's CFA. We want
3836 to authenticate using the CFA of the function which calls eh_return.
3838 if (aarch64_return_address_signing_enabled ()
3839 && (for_sibcall
|| !TARGET_ARMV8_3
|| crtl
->calls_eh_return
))
3841 insn
= emit_insn (gen_autisp ());
3842 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3843 RTX_FRAME_RELATED_P (insn
) = 1;
3846 /* Stack adjustment for exception handler. */
3847 if (crtl
->calls_eh_return
)
3849 /* We need to unwind the stack by the offset computed by
3850 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3851 to be SP; letting the CFA move during this adjustment
3852 is just as correct as retaining the CFA from the body
3853 of the function. Therefore, do nothing special. */
3854 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
3857 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
3859 emit_jump_insn (ret_rtx
);
3862 /* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3863 normally or return to a previous frame after unwinding.
3865 An EH return uses a single shared return sequence. The epilogue is
3866 exactly like a normal epilogue except that it has an extra input
3867 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3868 that must be applied after the frame has been destroyed. An extra label
3869 is inserted before the epilogue which initializes this register to zero,
3870 and this is the entry point for a normal return.
3872 An actual EH return updates the return address, initializes the stack
3873 adjustment and jumps directly into the epilogue (bypassing the zeroing
3874 of the adjustment). Since the return address is typically saved on the
3875 stack when a function makes a call, the saved LR must be updated outside
3878 This poses problems as the store is generated well before the epilogue,
3879 so the offset of LR is not known yet. Also optimizations will remove the
3880 store as it appears dead, even after the epilogue is generated (as the
3881 base or offset for loading LR is different in many cases).
3883 To avoid these problems this implementation forces the frame pointer
3884 in eh_return functions so that the location of LR is fixed and known early.
3885 It also marks the store volatile, so no optimization is permitted to
3886 remove the store. */
3888 aarch64_eh_return_handler_rtx (void)
3890 rtx tmp
= gen_frame_mem (Pmode
,
3891 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
3893 /* Mark the store volatile, so no optimization is permitted to remove it. */
3894 MEM_VOLATILE_P (tmp
) = true;
3898 /* Output code to add DELTA to the first argument, and then jump
3899 to FUNCTION. Used for C++ multiple inheritance. */
3901 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
3902 HOST_WIDE_INT delta
,
3903 HOST_WIDE_INT vcall_offset
,
3906 /* The this pointer is always in x0. Note that this differs from
3907 Arm where the this pointer maybe bumped to r1 if r0 is required
3908 to return a pointer to an aggregate. On AArch64 a result value
3909 pointer will be in x8. */
3910 int this_regno
= R0_REGNUM
;
3911 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
3914 reload_completed
= 1;
3915 emit_note (NOTE_INSN_PROLOGUE_END
);
3917 if (vcall_offset
== 0)
3918 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3921 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
3923 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
3924 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
3925 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
3930 if (delta
>= -256 && delta
< 256)
3931 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
3932 plus_constant (Pmode
, this_rtx
, delta
));
3934 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3937 if (Pmode
== ptr_mode
)
3938 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
3940 aarch64_emit_move (temp0
,
3941 gen_rtx_ZERO_EXTEND (Pmode
,
3942 gen_rtx_MEM (ptr_mode
, addr
)));
3944 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
3945 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
3948 aarch64_internal_mov_immediate (temp1
, GEN_INT (vcall_offset
), true,
3950 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
3953 if (Pmode
== ptr_mode
)
3954 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
3956 aarch64_emit_move (temp1
,
3957 gen_rtx_SIGN_EXTEND (Pmode
,
3958 gen_rtx_MEM (ptr_mode
, addr
)));
3960 emit_insn (gen_add2_insn (this_rtx
, temp1
));
3963 /* Generate a tail call to the target function. */
3964 if (!TREE_USED (function
))
3966 assemble_external (function
);
3967 TREE_USED (function
) = 1;
3969 funexp
= XEXP (DECL_RTL (function
), 0);
3970 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3971 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3972 SIBLING_CALL_P (insn
) = 1;
3974 insn
= get_insns ();
3975 shorten_branches (insn
);
3976 final_start_function (insn
, file
, 1);
3977 final (insn
, file
, 1);
3978 final_end_function ();
3980 /* Stop pretending to be a post-reload pass. */
3981 reload_completed
= 0;
3985 aarch64_tls_referenced_p (rtx x
)
3987 if (!TARGET_HAVE_TLS
)
3989 subrtx_iterator::array_type array
;
3990 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3992 const_rtx x
= *iter
;
3993 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3995 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3996 TLS offsets, not real symbol references. */
3997 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3998 iter
.skip_subrtxes ();
4004 /* Return true if val can be encoded as a 12-bit unsigned immediate with
4005 a left shift of 0 or 12 bits. */
4007 aarch64_uimm12_shift (HOST_WIDE_INT val
)
4009 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
4010 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
4015 /* Return true if val is an immediate that can be loaded into a
4016 register by a MOVZ instruction. */
4018 aarch64_movw_imm (HOST_WIDE_INT val
, scalar_int_mode mode
)
4020 if (GET_MODE_SIZE (mode
) > 4)
4022 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
4023 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
4028 /* Ignore sign extension. */
4029 val
&= (HOST_WIDE_INT
) 0xffffffff;
4031 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
4032 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
4035 /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
4037 static const unsigned HOST_WIDE_INT bitmask_imm_mul
[] =
4039 0x0000000100000001ull
,
4040 0x0001000100010001ull
,
4041 0x0101010101010101ull
,
4042 0x1111111111111111ull
,
4043 0x5555555555555555ull
,
4047 /* Return true if val is a valid bitmask immediate. */
4050 aarch64_bitmask_imm (HOST_WIDE_INT val_in
, machine_mode mode
)
4052 unsigned HOST_WIDE_INT val
, tmp
, mask
, first_one
, next_one
;
4055 /* Check for a single sequence of one bits and return quickly if so.
4056 The special cases of all ones and all zeroes returns false. */
4057 val
= (unsigned HOST_WIDE_INT
) val_in
;
4058 tmp
= val
+ (val
& -val
);
4060 if (tmp
== (tmp
& -tmp
))
4061 return (val
+ 1) > 1;
4063 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4065 val
= (val
<< 32) | (val
& 0xffffffff);
4067 /* Invert if the immediate doesn't start with a zero bit - this means we
4068 only need to search for sequences of one bits. */
4072 /* Find the first set bit and set tmp to val with the first sequence of one
4073 bits removed. Return success if there is a single sequence of ones. */
4074 first_one
= val
& -val
;
4075 tmp
= val
& (val
+ first_one
);
4080 /* Find the next set bit and compute the difference in bit position. */
4081 next_one
= tmp
& -tmp
;
4082 bits
= clz_hwi (first_one
) - clz_hwi (next_one
);
4085 /* Check the bit position difference is a power of 2, and that the first
4086 sequence of one bits fits within 'bits' bits. */
4087 if ((mask
>> bits
) != 0 || bits
!= (bits
& -bits
))
4090 /* Check the sequence of one bits is repeated 64/bits times. */
4091 return val
== mask
* bitmask_imm_mul
[__builtin_clz (bits
) - 26];
4094 /* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4095 Assumed precondition: VAL_IN Is not zero. */
4097 unsigned HOST_WIDE_INT
4098 aarch64_and_split_imm1 (HOST_WIDE_INT val_in
)
4100 int lowest_bit_set
= ctz_hwi (val_in
);
4101 int highest_bit_set
= floor_log2 (val_in
);
4102 gcc_assert (val_in
!= 0);
4104 return ((HOST_WIDE_INT_UC (2) << highest_bit_set
) -
4105 (HOST_WIDE_INT_1U
<< lowest_bit_set
));
4108 /* Create constant where bits outside of lowest bit set to highest bit set
4111 unsigned HOST_WIDE_INT
4112 aarch64_and_split_imm2 (HOST_WIDE_INT val_in
)
4114 return val_in
| ~aarch64_and_split_imm1 (val_in
);
4117 /* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4120 aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in
, machine_mode mode
)
4122 scalar_int_mode int_mode
;
4123 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
4126 if (aarch64_bitmask_imm (val_in
, int_mode
))
4129 if (aarch64_move_imm (val_in
, int_mode
))
4132 unsigned HOST_WIDE_INT imm2
= aarch64_and_split_imm2 (val_in
);
4134 return aarch64_bitmask_imm (imm2
, int_mode
);
4137 /* Return true if val is an immediate that can be loaded into a
4138 register in a single instruction. */
4140 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
4142 scalar_int_mode int_mode
;
4143 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
4146 if (aarch64_movw_imm (val
, int_mode
) || aarch64_movw_imm (~val
, int_mode
))
4148 return aarch64_bitmask_imm (val
, int_mode
);
4152 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
4156 if (GET_CODE (x
) == HIGH
)
4159 split_const (x
, &base
, &offset
);
4160 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
4162 if (aarch64_classify_symbol (base
, offset
)
4163 != SYMBOL_FORCE_TO_MEM
)
4166 /* Avoid generating a 64-bit relocation in ILP32; leave
4167 to aarch64_expand_mov_immediate to handle it properly. */
4168 return mode
!= ptr_mode
;
4171 return aarch64_tls_referenced_p (x
);
4174 /* Implement TARGET_CASE_VALUES_THRESHOLD.
4175 The expansion for a table switch is quite expensive due to the number
4176 of instructions, the table lookup and hard to predict indirect jump.
4177 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4178 set, otherwise use tables for > 16 cases as a tradeoff between size and
4179 performance. When optimizing for size, use the default setting. */
4182 aarch64_case_values_threshold (void)
4184 /* Use the specified limit for the number of cases before using jump
4185 tables at higher optimization levels. */
4187 && selected_cpu
->tune
->max_case_values
!= 0)
4188 return selected_cpu
->tune
->max_case_values
;
4190 return optimize_size
? default_case_values_threshold () : 17;
4193 /* Return true if register REGNO is a valid index register.
4194 STRICT_P is true if REG_OK_STRICT is in effect. */
4197 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
4199 if (!HARD_REGISTER_NUM_P (regno
))
4207 regno
= reg_renumber
[regno
];
4209 return GP_REGNUM_P (regno
);
4212 /* Return true if register REGNO is a valid base register for mode MODE.
4213 STRICT_P is true if REG_OK_STRICT is in effect. */
4216 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
4218 if (!HARD_REGISTER_NUM_P (regno
))
4226 regno
= reg_renumber
[regno
];
4229 /* The fake registers will be eliminated to either the stack or
4230 hard frame pointer, both of which are usually valid base registers.
4231 Reload deals with the cases where the eliminated form isn't valid. */
4232 return (GP_REGNUM_P (regno
)
4233 || regno
== SP_REGNUM
4234 || regno
== FRAME_POINTER_REGNUM
4235 || regno
== ARG_POINTER_REGNUM
);
4238 /* Return true if X is a valid base register for mode MODE.
4239 STRICT_P is true if REG_OK_STRICT is in effect. */
4242 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
4245 && GET_CODE (x
) == SUBREG
4246 && contains_reg_of_mode
[GENERAL_REGS
][GET_MODE (SUBREG_REG (x
))])
4249 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
4252 /* Return true if address offset is a valid index. If it is, fill in INFO
4253 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4256 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
4257 machine_mode mode
, bool strict_p
)
4259 enum aarch64_address_type type
;
4264 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
4265 && GET_MODE (x
) == Pmode
)
4267 type
= ADDRESS_REG_REG
;
4271 /* (sign_extend:DI (reg:SI)) */
4272 else if ((GET_CODE (x
) == SIGN_EXTEND
4273 || GET_CODE (x
) == ZERO_EXTEND
)
4274 && GET_MODE (x
) == DImode
4275 && GET_MODE (XEXP (x
, 0)) == SImode
)
4277 type
= (GET_CODE (x
) == SIGN_EXTEND
)
4278 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4279 index
= XEXP (x
, 0);
4282 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4283 else if (GET_CODE (x
) == MULT
4284 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4285 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4286 && GET_MODE (XEXP (x
, 0)) == DImode
4287 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4288 && CONST_INT_P (XEXP (x
, 1)))
4290 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4291 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4292 index
= XEXP (XEXP (x
, 0), 0);
4293 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4295 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4296 else if (GET_CODE (x
) == ASHIFT
4297 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4298 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4299 && GET_MODE (XEXP (x
, 0)) == DImode
4300 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4301 && CONST_INT_P (XEXP (x
, 1)))
4303 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4304 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4305 index
= XEXP (XEXP (x
, 0), 0);
4306 shift
= INTVAL (XEXP (x
, 1));
4308 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4309 else if ((GET_CODE (x
) == SIGN_EXTRACT
4310 || GET_CODE (x
) == ZERO_EXTRACT
)
4311 && GET_MODE (x
) == DImode
4312 && GET_CODE (XEXP (x
, 0)) == MULT
4313 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4314 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4316 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4317 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4318 index
= XEXP (XEXP (x
, 0), 0);
4319 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4320 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4321 || INTVAL (XEXP (x
, 2)) != 0)
4324 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4325 (const_int 0xffffffff<<shift)) */
4326 else if (GET_CODE (x
) == AND
4327 && GET_MODE (x
) == DImode
4328 && GET_CODE (XEXP (x
, 0)) == MULT
4329 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4330 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4331 && CONST_INT_P (XEXP (x
, 1)))
4333 type
= ADDRESS_REG_UXTW
;
4334 index
= XEXP (XEXP (x
, 0), 0);
4335 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4336 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4339 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4340 else if ((GET_CODE (x
) == SIGN_EXTRACT
4341 || GET_CODE (x
) == ZERO_EXTRACT
)
4342 && GET_MODE (x
) == DImode
4343 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4344 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4345 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4347 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4348 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4349 index
= XEXP (XEXP (x
, 0), 0);
4350 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4351 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4352 || INTVAL (XEXP (x
, 2)) != 0)
4355 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4356 (const_int 0xffffffff<<shift)) */
4357 else if (GET_CODE (x
) == AND
4358 && GET_MODE (x
) == DImode
4359 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4360 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4361 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4362 && CONST_INT_P (XEXP (x
, 1)))
4364 type
= ADDRESS_REG_UXTW
;
4365 index
= XEXP (XEXP (x
, 0), 0);
4366 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4367 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4370 /* (mult:P (reg:P) (const_int scale)) */
4371 else if (GET_CODE (x
) == MULT
4372 && GET_MODE (x
) == Pmode
4373 && GET_MODE (XEXP (x
, 0)) == Pmode
4374 && CONST_INT_P (XEXP (x
, 1)))
4376 type
= ADDRESS_REG_REG
;
4377 index
= XEXP (x
, 0);
4378 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4380 /* (ashift:P (reg:P) (const_int shift)) */
4381 else if (GET_CODE (x
) == ASHIFT
4382 && GET_MODE (x
) == Pmode
4383 && GET_MODE (XEXP (x
, 0)) == Pmode
4384 && CONST_INT_P (XEXP (x
, 1)))
4386 type
= ADDRESS_REG_REG
;
4387 index
= XEXP (x
, 0);
4388 shift
= INTVAL (XEXP (x
, 1));
4394 && GET_CODE (index
) == SUBREG
4395 && contains_reg_of_mode
[GENERAL_REGS
][GET_MODE (SUBREG_REG (index
))])
4396 index
= SUBREG_REG (index
);
4399 (shift
> 0 && shift
<= 3
4400 && (1 << shift
) == GET_MODE_SIZE (mode
)))
4402 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
4405 info
->offset
= index
;
4406 info
->shift
= shift
;
4413 /* Return true if MODE is one of the modes for which we
4414 support LDP/STP operations. */
4417 aarch64_mode_valid_for_sched_fusion_p (machine_mode mode
)
4419 return mode
== SImode
|| mode
== DImode
4420 || mode
== SFmode
|| mode
== DFmode
4421 || (aarch64_vector_mode_supported_p (mode
)
4422 && GET_MODE_SIZE (mode
) == 8);
4425 /* Return true if REGNO is a virtual pointer register, or an eliminable
4426 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4427 include stack_pointer or hard_frame_pointer. */
4429 virt_or_elim_regno_p (unsigned regno
)
4431 return ((regno
>= FIRST_VIRTUAL_REGISTER
4432 && regno
<= LAST_VIRTUAL_POINTER_REGISTER
)
4433 || regno
== FRAME_POINTER_REGNUM
4434 || regno
== ARG_POINTER_REGNUM
);
4437 /* Return true if X is a valid address for machine mode MODE. If it is,
4438 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4439 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4442 aarch64_classify_address (struct aarch64_address_info
*info
,
4443 rtx x
, machine_mode mode
,
4444 RTX_CODE outer_code
, bool strict_p
)
4446 enum rtx_code code
= GET_CODE (x
);
4449 /* On BE, we use load/store pair for all large int mode load/stores.
4450 TI/TFmode may also use a load/store pair. */
4451 bool load_store_pair_p
= (outer_code
== PARALLEL
4454 || (BYTES_BIG_ENDIAN
4455 && aarch64_vect_struct_mode_p (mode
)));
4457 bool allow_reg_index_p
=
4459 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
4460 && !aarch64_vect_struct_mode_p (mode
);
4462 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4464 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
4465 && (code
!= POST_INC
&& code
!= REG
))
4472 info
->type
= ADDRESS_REG_IMM
;
4474 info
->offset
= const0_rtx
;
4475 return aarch64_base_register_rtx_p (x
, strict_p
);
4483 && virt_or_elim_regno_p (REGNO (op0
))
4484 && CONST_INT_P (op1
))
4486 info
->type
= ADDRESS_REG_IMM
;
4493 if (GET_MODE_SIZE (mode
) != 0
4494 && CONST_INT_P (op1
)
4495 && aarch64_base_register_rtx_p (op0
, strict_p
))
4497 HOST_WIDE_INT offset
= INTVAL (op1
);
4499 info
->type
= ADDRESS_REG_IMM
;
4503 /* TImode and TFmode values are allowed in both pairs of X
4504 registers and individual Q registers. The available
4506 X,X: 7-bit signed scaled offset
4507 Q: 9-bit signed offset
4508 We conservatively require an offset representable in either mode.
4509 When performing the check for pairs of X registers i.e. LDP/STP
4510 pass down DImode since that is the natural size of the LDP/STP
4511 instruction memory accesses. */
4512 if (mode
== TImode
|| mode
== TFmode
)
4513 return (aarch64_offset_7bit_signed_scaled_p (DImode
, offset
)
4514 && (offset_9bit_signed_unscaled_p (mode
, offset
)
4515 || offset_12bit_unsigned_scaled_p (mode
, offset
)));
4517 /* A 7bit offset check because OImode will emit a ldp/stp
4518 instruction (only big endian will get here).
4519 For ldp/stp instructions, the offset is scaled for the size of a
4520 single element of the pair. */
4522 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
4524 /* Three 9/12 bit offsets checks because CImode will emit three
4525 ldr/str instructions (only big endian will get here). */
4527 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4528 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
4529 || offset_12bit_unsigned_scaled_p (V16QImode
,
4532 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4533 instructions (only big endian will get here). */
4535 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4536 && aarch64_offset_7bit_signed_scaled_p (TImode
,
4539 if (load_store_pair_p
)
4540 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4541 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4543 return (offset_9bit_signed_unscaled_p (mode
, offset
)
4544 || offset_12bit_unsigned_scaled_p (mode
, offset
));
4547 if (allow_reg_index_p
)
4549 /* Look for base + (scaled/extended) index register. */
4550 if (aarch64_base_register_rtx_p (op0
, strict_p
)
4551 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
4556 if (aarch64_base_register_rtx_p (op1
, strict_p
)
4557 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
4570 info
->type
= ADDRESS_REG_WB
;
4571 info
->base
= XEXP (x
, 0);
4572 info
->offset
= NULL_RTX
;
4573 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
4577 info
->type
= ADDRESS_REG_WB
;
4578 info
->base
= XEXP (x
, 0);
4579 if (GET_CODE (XEXP (x
, 1)) == PLUS
4580 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
4581 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
4582 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4584 HOST_WIDE_INT offset
;
4585 info
->offset
= XEXP (XEXP (x
, 1), 1);
4586 offset
= INTVAL (info
->offset
);
4588 /* TImode and TFmode values are allowed in both pairs of X
4589 registers and individual Q registers. The available
4591 X,X: 7-bit signed scaled offset
4592 Q: 9-bit signed offset
4593 We conservatively require an offset representable in either mode.
4595 if (mode
== TImode
|| mode
== TFmode
)
4596 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
4597 && offset_9bit_signed_unscaled_p (mode
, offset
));
4599 if (load_store_pair_p
)
4600 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4601 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4603 return offset_9bit_signed_unscaled_p (mode
, offset
);
4610 /* load literal: pc-relative constant pool entry. Only supported
4611 for SI mode or larger. */
4612 info
->type
= ADDRESS_SYMBOLIC
;
4614 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
4618 split_const (x
, &sym
, &addend
);
4619 return ((GET_CODE (sym
) == LABEL_REF
4620 || (GET_CODE (sym
) == SYMBOL_REF
4621 && CONSTANT_POOL_ADDRESS_P (sym
)
4622 && aarch64_pcrelative_literal_loads
)));
4627 info
->type
= ADDRESS_LO_SUM
;
4628 info
->base
= XEXP (x
, 0);
4629 info
->offset
= XEXP (x
, 1);
4630 if (allow_reg_index_p
4631 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4634 split_const (info
->offset
, &sym
, &offs
);
4635 if (GET_CODE (sym
) == SYMBOL_REF
4636 && (aarch64_classify_symbol (sym
, offs
) == SYMBOL_SMALL_ABSOLUTE
))
4638 /* The symbol and offset must be aligned to the access size. */
4640 unsigned int ref_size
;
4642 if (CONSTANT_POOL_ADDRESS_P (sym
))
4643 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
4644 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
4646 tree exp
= SYMBOL_REF_DECL (sym
);
4647 align
= TYPE_ALIGN (TREE_TYPE (exp
));
4648 align
= aarch64_constant_alignment (exp
, align
);
4650 else if (SYMBOL_REF_DECL (sym
))
4651 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
4652 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
4653 && SYMBOL_REF_BLOCK (sym
) != NULL
)
4654 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
4656 align
= BITS_PER_UNIT
;
4658 ref_size
= GET_MODE_SIZE (mode
);
4660 ref_size
= GET_MODE_SIZE (DImode
);
4662 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
4663 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
4673 /* Return true if the address X is valid for a PRFM instruction.
4674 STRICT_P is true if we should do strict checking with
4675 aarch64_classify_address. */
4678 aarch64_address_valid_for_prefetch_p (rtx x
, bool strict_p
)
4680 struct aarch64_address_info addr
;
4682 /* PRFM accepts the same addresses as DImode... */
4683 bool res
= aarch64_classify_address (&addr
, x
, DImode
, MEM
, strict_p
);
4687 /* ... except writeback forms. */
4688 return addr
.type
!= ADDRESS_REG_WB
;
4692 aarch64_symbolic_address_p (rtx x
)
4696 split_const (x
, &x
, &offset
);
4697 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
4700 /* Classify the base of symbolic expression X. */
4702 enum aarch64_symbol_type
4703 aarch64_classify_symbolic_expression (rtx x
)
4707 split_const (x
, &x
, &offset
);
4708 return aarch64_classify_symbol (x
, offset
);
4712 /* Return TRUE if X is a legitimate address for accessing memory in
4715 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
4717 struct aarch64_address_info addr
;
4719 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
4722 /* Return TRUE if X is a legitimate address for accessing memory in
4723 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4726 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
4727 RTX_CODE outer_code
, bool strict_p
)
4729 struct aarch64_address_info addr
;
4731 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
4734 /* Split an out-of-range address displacement into a base and offset.
4735 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4736 to increase opportunities for sharing the base address of different sizes.
4737 Unaligned accesses use the signed 9-bit range, TImode/TFmode use
4738 the intersection of signed scaled 7-bit and signed 9-bit offset. */
4740 aarch64_legitimize_address_displacement (rtx
*disp
, rtx
*off
, machine_mode mode
)
4742 HOST_WIDE_INT offset
= INTVAL (*disp
);
4745 if (mode
== TImode
|| mode
== TFmode
)
4746 base
= (offset
+ 0x100) & ~0x1f8;
4747 else if ((offset
& (GET_MODE_SIZE (mode
) - 1)) != 0)
4748 base
= (offset
+ 0x100) & ~0x1ff;
4750 base
= offset
& ~(GET_MODE_SIZE (mode
) < 4 ? 0xfff : 0x3ffc);
4752 *off
= GEN_INT (base
);
4753 *disp
= GEN_INT (offset
- base
);
4757 /* Return the binary representation of floating point constant VALUE in INTVAL.
4758 If the value cannot be converted, return false without setting INTVAL.
4759 The conversion is done in the given MODE. */
4761 aarch64_reinterpret_float_as_int (rtx value
, unsigned HOST_WIDE_INT
*intval
)
4764 /* We make a general exception for 0. */
4765 if (aarch64_float_const_zero_rtx_p (value
))
4771 machine_mode mode
= GET_MODE (value
);
4772 if (GET_CODE (value
) != CONST_DOUBLE
4773 || !SCALAR_FLOAT_MODE_P (mode
)
4774 || GET_MODE_BITSIZE (mode
) > HOST_BITS_PER_WIDE_INT
4775 /* Only support up to DF mode. */
4776 || GET_MODE_BITSIZE (mode
) > GET_MODE_BITSIZE (DFmode
))
4779 unsigned HOST_WIDE_INT ival
= 0;
4782 real_to_target (res
,
4783 CONST_DOUBLE_REAL_VALUE (value
),
4784 REAL_MODE_FORMAT (mode
));
4788 int order
= BYTES_BIG_ENDIAN
? 1 : 0;
4789 ival
= zext_hwi (res
[order
], 32);
4790 ival
|= (zext_hwi (res
[1 - order
], 32) << 32);
4793 ival
= zext_hwi (res
[0], 32);
4799 /* Return TRUE if rtx X is an immediate constant that can be moved using a
4800 single MOV(+MOVK) followed by an FMOV. */
4802 aarch64_float_const_rtx_p (rtx x
)
4804 machine_mode mode
= GET_MODE (x
);
4805 if (mode
== VOIDmode
)
4808 /* Determine whether it's cheaper to write float constants as
4809 mov/movk pairs over ldr/adrp pairs. */
4810 unsigned HOST_WIDE_INT ival
;
4812 if (GET_CODE (x
) == CONST_DOUBLE
4813 && SCALAR_FLOAT_MODE_P (mode
)
4814 && aarch64_reinterpret_float_as_int (x
, &ival
))
4816 scalar_int_mode imode
= (mode
== HFmode
4818 : int_mode_for_mode (mode
).require ());
4819 int num_instr
= aarch64_internal_mov_immediate
4820 (NULL_RTX
, gen_int_mode (ival
, imode
), false, imode
);
4821 return num_instr
< 3;
4827 /* Return TRUE if rtx X is immediate constant 0.0 */
4829 aarch64_float_const_zero_rtx_p (rtx x
)
4831 if (GET_MODE (x
) == VOIDmode
)
4834 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x
)))
4835 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
4836 return real_equal (CONST_DOUBLE_REAL_VALUE (x
), &dconst0
);
4839 /* Return TRUE if rtx X is immediate constant that fits in a single
4840 MOVI immediate operation. */
4842 aarch64_can_const_movi_rtx_p (rtx x
, machine_mode mode
)
4848 scalar_int_mode imode
;
4849 unsigned HOST_WIDE_INT ival
;
4851 if (GET_CODE (x
) == CONST_DOUBLE
4852 && SCALAR_FLOAT_MODE_P (mode
))
4854 if (!aarch64_reinterpret_float_as_int (x
, &ival
))
4857 /* We make a general exception for 0. */
4858 if (aarch64_float_const_zero_rtx_p (x
))
4861 imode
= int_mode_for_mode (mode
).require ();
4863 else if (GET_CODE (x
) == CONST_INT
4864 && is_a
<scalar_int_mode
> (mode
, &imode
))
4869 /* use a 64 bit mode for everything except for DI/DF mode, where we use
4870 a 128 bit vector mode. */
4871 int width
= GET_MODE_BITSIZE (imode
) == 64 ? 128 : 64;
4873 vmode
= aarch64_simd_container_mode (imode
, width
);
4874 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, ival
);
4876 return aarch64_simd_valid_immediate (v_op
, vmode
, false, NULL
);
4880 /* Return the fixed registers used for condition codes. */
4883 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
4886 *p2
= INVALID_REGNUM
;
4890 /* This function is used by the call expanders of the machine description.
4891 RESULT is the register in which the result is returned. It's NULL for
4892 "call" and "sibcall".
4893 MEM is the location of the function call.
4894 SIBCALL indicates whether this function call is normal call or sibling call.
4895 It will generate different pattern accordingly. */
4898 aarch64_expand_call (rtx result
, rtx mem
, bool sibcall
)
4900 rtx call
, callee
, tmp
;
4904 gcc_assert (MEM_P (mem
));
4905 callee
= XEXP (mem
, 0);
4906 mode
= GET_MODE (callee
);
4907 gcc_assert (mode
== Pmode
);
4909 /* Decide if we should generate indirect calls by loading the
4910 address of the callee into a register before performing
4911 the branch-and-link. */
4912 if (SYMBOL_REF_P (callee
)
4913 ? (aarch64_is_long_call_p (callee
)
4914 || aarch64_is_noplt_call_p (callee
))
4916 XEXP (mem
, 0) = force_reg (mode
, callee
);
4918 call
= gen_rtx_CALL (VOIDmode
, mem
, const0_rtx
);
4920 if (result
!= NULL_RTX
)
4921 call
= gen_rtx_SET (result
, call
);
4926 tmp
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNUM
));
4928 vec
= gen_rtvec (2, call
, tmp
);
4929 call
= gen_rtx_PARALLEL (VOIDmode
, vec
);
4931 aarch64_emit_call_insn (call
);
4934 /* Emit call insn with PAT and do aarch64-specific handling. */
4937 aarch64_emit_call_insn (rtx pat
)
4939 rtx insn
= emit_call_insn (pat
);
4941 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
4942 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
4943 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
4947 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
4949 /* All floating point compares return CCFP if it is an equality
4950 comparison, and CCFPE otherwise. */
4951 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
4978 /* Equality comparisons of short modes against zero can be performed
4979 using the TST instruction with the appropriate bitmask. */
4980 if (y
== const0_rtx
&& REG_P (x
)
4981 && (code
== EQ
|| code
== NE
)
4982 && (GET_MODE (x
) == HImode
|| GET_MODE (x
) == QImode
))
4985 /* Similarly, comparisons of zero_extends from shorter modes can
4986 be performed using an ANDS with an immediate mask. */
4987 if (y
== const0_rtx
&& GET_CODE (x
) == ZERO_EXTEND
4988 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4989 && (GET_MODE (XEXP (x
, 0)) == HImode
|| GET_MODE (XEXP (x
, 0)) == QImode
)
4990 && (code
== EQ
|| code
== NE
))
4993 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4995 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
4996 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
4997 || GET_CODE (x
) == NEG
4998 || (GET_CODE (x
) == ZERO_EXTRACT
&& CONST_INT_P (XEXP (x
, 1))
4999 && CONST_INT_P (XEXP (x
, 2)))))
5002 /* A compare with a shifted operand. Because of canonicalization,
5003 the comparison will have to be swapped when we emit the assembly
5005 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
5006 && (REG_P (y
) || GET_CODE (y
) == SUBREG
|| y
== const0_rtx
)
5007 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
5008 || GET_CODE (x
) == LSHIFTRT
5009 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
5012 /* Similarly for a negated operand, but we can only do this for
5014 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
5015 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
5016 && (code
== EQ
|| code
== NE
)
5017 && GET_CODE (x
) == NEG
)
5020 /* A test for unsigned overflow. */
5021 if ((GET_MODE (x
) == DImode
|| GET_MODE (x
) == TImode
)
5023 && GET_CODE (x
) == PLUS
5024 && GET_CODE (y
) == ZERO_EXTEND
)
5027 /* For everything else, return CCmode. */
5032 aarch64_get_condition_code_1 (machine_mode
, enum rtx_code
);
5035 aarch64_get_condition_code (rtx x
)
5037 machine_mode mode
= GET_MODE (XEXP (x
, 0));
5038 enum rtx_code comp_code
= GET_CODE (x
);
5040 if (GET_MODE_CLASS (mode
) != MODE_CC
)
5041 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
5042 return aarch64_get_condition_code_1 (mode
, comp_code
);
5046 aarch64_get_condition_code_1 (machine_mode mode
, enum rtx_code comp_code
)
5054 case GE
: return AARCH64_GE
;
5055 case GT
: return AARCH64_GT
;
5056 case LE
: return AARCH64_LS
;
5057 case LT
: return AARCH64_MI
;
5058 case NE
: return AARCH64_NE
;
5059 case EQ
: return AARCH64_EQ
;
5060 case ORDERED
: return AARCH64_VC
;
5061 case UNORDERED
: return AARCH64_VS
;
5062 case UNLT
: return AARCH64_LT
;
5063 case UNLE
: return AARCH64_LE
;
5064 case UNGT
: return AARCH64_HI
;
5065 case UNGE
: return AARCH64_PL
;
5073 case NE
: return AARCH64_NE
;
5074 case EQ
: return AARCH64_EQ
;
5075 case GE
: return AARCH64_GE
;
5076 case GT
: return AARCH64_GT
;
5077 case LE
: return AARCH64_LE
;
5078 case LT
: return AARCH64_LT
;
5079 case GEU
: return AARCH64_CS
;
5080 case GTU
: return AARCH64_HI
;
5081 case LEU
: return AARCH64_LS
;
5082 case LTU
: return AARCH64_CC
;
5090 case NE
: return AARCH64_NE
;
5091 case EQ
: return AARCH64_EQ
;
5092 case GE
: return AARCH64_LE
;
5093 case GT
: return AARCH64_LT
;
5094 case LE
: return AARCH64_GE
;
5095 case LT
: return AARCH64_GT
;
5096 case GEU
: return AARCH64_LS
;
5097 case GTU
: return AARCH64_CC
;
5098 case LEU
: return AARCH64_CS
;
5099 case LTU
: return AARCH64_HI
;
5107 case NE
: return AARCH64_NE
;
5108 case EQ
: return AARCH64_EQ
;
5109 case GE
: return AARCH64_PL
;
5110 case LT
: return AARCH64_MI
;
5118 case NE
: return AARCH64_NE
;
5119 case EQ
: return AARCH64_EQ
;
5127 case NE
: return AARCH64_CS
;
5128 case EQ
: return AARCH64_CC
;
5141 aarch64_const_vec_all_same_in_range_p (rtx x
,
5142 HOST_WIDE_INT minval
,
5143 HOST_WIDE_INT maxval
)
5145 HOST_WIDE_INT firstval
;
5148 if (GET_CODE (x
) != CONST_VECTOR
5149 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
5152 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
5153 if (firstval
< minval
|| firstval
> maxval
)
5156 count
= CONST_VECTOR_NUNITS (x
);
5157 for (i
= 1; i
< count
; i
++)
5158 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
5165 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
5167 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
5172 #define AARCH64_CC_V 1
5173 #define AARCH64_CC_C (1 << 1)
5174 #define AARCH64_CC_Z (1 << 2)
5175 #define AARCH64_CC_N (1 << 3)
5177 /* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
5178 static const int aarch64_nzcv_codes
[] =
5180 0, /* EQ, Z == 1. */
5181 AARCH64_CC_Z
, /* NE, Z == 0. */
5182 0, /* CS, C == 1. */
5183 AARCH64_CC_C
, /* CC, C == 0. */
5184 0, /* MI, N == 1. */
5185 AARCH64_CC_N
, /* PL, N == 0. */
5186 0, /* VS, V == 1. */
5187 AARCH64_CC_V
, /* VC, V == 0. */
5188 0, /* HI, C ==1 && Z == 0. */
5189 AARCH64_CC_C
, /* LS, !(C == 1 && Z == 0). */
5190 AARCH64_CC_V
, /* GE, N == V. */
5191 0, /* LT, N != V. */
5192 AARCH64_CC_Z
, /* GT, Z == 0 && N == V. */
5193 0, /* LE, !(Z == 0 && N == V). */
5198 /* Print operand X to file F in a target specific manner according to CODE.
5199 The acceptable formatting commands given by CODE are:
5200 'c': An integer or symbol address without a preceding #
5202 'e': Print the sign/zero-extend size as a character 8->b,
5204 'p': Prints N such that 2^N == X (X must be power of 2 and
5206 'P': Print the number of non-zero bits in X (a const_int).
5207 'H': Print the higher numbered register of a pair (TImode)
5209 'm': Print a condition (eq, ne, etc).
5210 'M': Same as 'm', but invert condition.
5211 'b/h/s/d/q': Print a scalar FP/SIMD register name.
5212 'S/T/U/V': Print a FP/SIMD register name for a register list.
5213 The register printed is the FP/SIMD register name
5214 of X + 0/1/2/3 for S/T/U/V.
5215 'R': Print a scalar FP/SIMD register name + 1.
5216 'X': Print bottom 16 bits of integer constant in hex.
5217 'w/x': Print a general register name or the zero register
5219 '0': Print a normal operand, if it's a general register,
5220 then we assume DImode.
5221 'k': Print NZCV for conditional compare instructions.
5222 'A': Output address constant representing the first
5223 argument of X, specifying a relocation offset
5225 'L': Output constant address specified by X
5226 with a relocation offset if appropriate.
5227 'G': Prints address of X, specifying a PC relative
5228 relocation mode if appropriate. */
5231 aarch64_print_operand (FILE *f
, rtx x
, int code
)
5236 switch (GET_CODE (x
))
5239 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5243 output_addr_const (f
, x
);
5247 if (GET_CODE (XEXP (x
, 0)) == PLUS
5248 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5250 output_addr_const (f
, x
);
5256 output_operand_lossage ("Unsupported operand for code '%c'", code
);
5264 if (!CONST_INT_P (x
)
5265 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
5267 output_operand_lossage ("invalid operand for '%%%c'", code
);
5283 output_operand_lossage ("invalid operand for '%%%c'", code
);
5293 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
5295 output_operand_lossage ("invalid operand for '%%%c'", code
);
5299 asm_fprintf (f
, "%d", n
);
5304 if (!CONST_INT_P (x
))
5306 output_operand_lossage ("invalid operand for '%%%c'", code
);
5310 asm_fprintf (f
, "%u", popcount_hwi (INTVAL (x
)));
5314 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
5316 output_operand_lossage ("invalid operand for '%%%c'", code
);
5320 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
5327 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5328 if (x
== const_true_rtx
)
5335 if (!COMPARISON_P (x
))
5337 output_operand_lossage ("invalid operand for '%%%c'", code
);
5341 cond_code
= aarch64_get_condition_code (x
);
5342 gcc_assert (cond_code
>= 0);
5344 cond_code
= AARCH64_INVERSE_CONDITION_CODE (cond_code
);
5345 fputs (aarch64_condition_codes
[cond_code
], f
);
5354 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5356 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5359 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
5366 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5368 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5371 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
5375 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5377 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5380 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
5384 if (!CONST_INT_P (x
))
5386 output_operand_lossage ("invalid operand for '%%%c'", code
);
5389 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
5395 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
5397 asm_fprintf (f
, "%czr", code
);
5401 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
5403 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
5407 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
5409 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
5418 output_operand_lossage ("missing operand");
5422 switch (GET_CODE (x
))
5425 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
5429 output_address (GET_MODE (x
), XEXP (x
, 0));
5430 /* Check all memory references are Pmode - even with ILP32. */
5431 gcc_assert (GET_MODE (XEXP (x
, 0)) == Pmode
);
5437 output_addr_const (asm_out_file
, x
);
5441 asm_fprintf (f
, "%wd", INTVAL (x
));
5445 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
5448 aarch64_const_vec_all_same_in_range_p (x
,
5450 HOST_WIDE_INT_MAX
));
5451 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
5453 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
5462 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5463 be getting CONST_DOUBLEs holding integers. */
5464 gcc_assert (GET_MODE (x
) != VOIDmode
);
5465 if (aarch64_float_const_zero_rtx_p (x
))
5470 else if (aarch64_float_const_representable_p (x
))
5473 char float_buf
[buf_size
] = {'\0'};
5474 real_to_decimal_for_mode (float_buf
,
5475 CONST_DOUBLE_REAL_VALUE (x
),
5478 asm_fprintf (asm_out_file
, "%s", float_buf
);
5482 output_operand_lossage ("invalid constant");
5485 output_operand_lossage ("invalid operand");
5491 if (GET_CODE (x
) == HIGH
)
5494 switch (aarch64_classify_symbolic_expression (x
))
5496 case SYMBOL_SMALL_GOT_4G
:
5497 asm_fprintf (asm_out_file
, ":got:");
5500 case SYMBOL_SMALL_TLSGD
:
5501 asm_fprintf (asm_out_file
, ":tlsgd:");
5504 case SYMBOL_SMALL_TLSDESC
:
5505 asm_fprintf (asm_out_file
, ":tlsdesc:");
5508 case SYMBOL_SMALL_TLSIE
:
5509 asm_fprintf (asm_out_file
, ":gottprel:");
5512 case SYMBOL_TLSLE24
:
5513 asm_fprintf (asm_out_file
, ":tprel:");
5516 case SYMBOL_TINY_GOT
:
5523 output_addr_const (asm_out_file
, x
);
5527 switch (aarch64_classify_symbolic_expression (x
))
5529 case SYMBOL_SMALL_GOT_4G
:
5530 asm_fprintf (asm_out_file
, ":lo12:");
5533 case SYMBOL_SMALL_TLSGD
:
5534 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
5537 case SYMBOL_SMALL_TLSDESC
:
5538 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
5541 case SYMBOL_SMALL_TLSIE
:
5542 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
5545 case SYMBOL_TLSLE12
:
5546 asm_fprintf (asm_out_file
, ":tprel_lo12:");
5549 case SYMBOL_TLSLE24
:
5550 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
5553 case SYMBOL_TINY_GOT
:
5554 asm_fprintf (asm_out_file
, ":got:");
5557 case SYMBOL_TINY_TLSIE
:
5558 asm_fprintf (asm_out_file
, ":gottprel:");
5564 output_addr_const (asm_out_file
, x
);
5568 switch (aarch64_classify_symbolic_expression (x
))
5570 case SYMBOL_TLSLE24
:
5571 asm_fprintf (asm_out_file
, ":tprel_hi12:");
5576 output_addr_const (asm_out_file
, x
);
5581 HOST_WIDE_INT cond_code
;
5583 if (!CONST_INT_P (x
))
5585 output_operand_lossage ("invalid operand for '%%%c'", code
);
5589 cond_code
= INTVAL (x
);
5590 gcc_assert (cond_code
>= 0 && cond_code
<= AARCH64_NV
);
5591 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
]);
5596 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
5602 aarch64_print_operand_address (FILE *f
, machine_mode mode
, rtx x
)
5604 struct aarch64_address_info addr
;
5606 if (aarch64_classify_address (&addr
, x
, mode
, MEM
, true))
5609 case ADDRESS_REG_IMM
:
5610 if (addr
.offset
== const0_rtx
)
5611 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
5613 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
5614 INTVAL (addr
.offset
));
5617 case ADDRESS_REG_REG
:
5618 if (addr
.shift
== 0)
5619 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
5620 reg_names
[REGNO (addr
.offset
)]);
5622 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
5623 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
5626 case ADDRESS_REG_UXTW
:
5627 if (addr
.shift
== 0)
5628 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
5629 REGNO (addr
.offset
) - R0_REGNUM
);
5631 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
5632 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5635 case ADDRESS_REG_SXTW
:
5636 if (addr
.shift
== 0)
5637 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
5638 REGNO (addr
.offset
) - R0_REGNUM
);
5640 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
5641 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5644 case ADDRESS_REG_WB
:
5645 switch (GET_CODE (x
))
5648 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
5649 GET_MODE_SIZE (mode
));
5652 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
5653 GET_MODE_SIZE (mode
));
5656 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
5657 GET_MODE_SIZE (mode
));
5660 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
5661 GET_MODE_SIZE (mode
));
5664 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
5665 INTVAL (addr
.offset
));
5668 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
5669 INTVAL (addr
.offset
));
5676 case ADDRESS_LO_SUM
:
5677 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
5678 output_addr_const (f
, addr
.offset
);
5679 asm_fprintf (f
, "]");
5682 case ADDRESS_SYMBOLIC
:
5686 output_addr_const (f
, x
);
5690 aarch64_label_mentioned_p (rtx x
)
5695 if (GET_CODE (x
) == LABEL_REF
)
5698 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5699 referencing instruction, but they are constant offsets, not
5701 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
5704 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
5705 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5711 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5712 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
5715 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
5722 /* Implement REGNO_REG_CLASS. */
5725 aarch64_regno_regclass (unsigned regno
)
5727 if (GP_REGNUM_P (regno
))
5728 return GENERAL_REGS
;
5730 if (regno
== SP_REGNUM
)
5733 if (regno
== FRAME_POINTER_REGNUM
5734 || regno
== ARG_POINTER_REGNUM
)
5735 return POINTER_REGS
;
5737 if (FP_REGNUM_P (regno
))
5738 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
5744 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
5746 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5747 where mask is selected by alignment and size of the offset.
5748 We try to pick as large a range for the offset as possible to
5749 maximize the chance of a CSE. However, for aligned addresses
5750 we limit the range to 4k so that structures with different sized
5751 elements are likely to use the same base. We need to be careful
5752 not to split a CONST for some forms of address expression, otherwise
5753 it will generate sub-optimal code. */
5755 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
5757 rtx base
= XEXP (x
, 0);
5758 rtx offset_rtx
= XEXP (x
, 1);
5759 HOST_WIDE_INT offset
= INTVAL (offset_rtx
);
5761 if (GET_CODE (base
) == PLUS
)
5763 rtx op0
= XEXP (base
, 0);
5764 rtx op1
= XEXP (base
, 1);
5766 /* Force any scaling into a temp for CSE. */
5767 op0
= force_reg (Pmode
, op0
);
5768 op1
= force_reg (Pmode
, op1
);
5770 /* Let the pointer register be in op0. */
5771 if (REG_POINTER (op1
))
5772 std::swap (op0
, op1
);
5774 /* If the pointer is virtual or frame related, then we know that
5775 virtual register instantiation or register elimination is going
5776 to apply a second constant. We want the two constants folded
5777 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5778 if (virt_or_elim_regno_p (REGNO (op0
)))
5780 base
= expand_binop (Pmode
, add_optab
, op0
, offset_rtx
,
5781 NULL_RTX
, true, OPTAB_DIRECT
);
5782 return gen_rtx_PLUS (Pmode
, base
, op1
);
5785 /* Otherwise, in order to encourage CSE (and thence loop strength
5786 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5787 base
= expand_binop (Pmode
, add_optab
, op0
, op1
,
5788 NULL_RTX
, true, OPTAB_DIRECT
);
5789 x
= gen_rtx_PLUS (Pmode
, base
, offset_rtx
);
5792 /* Does it look like we'll need a 16-byte load/store-pair operation? */
5793 HOST_WIDE_INT base_offset
;
5794 if (GET_MODE_SIZE (mode
) > 16)
5795 base_offset
= (offset
+ 0x400) & ~0x7f0;
5796 /* For offsets aren't a multiple of the access size, the limit is
5798 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
5800 base_offset
= (offset
+ 0x100) & ~0x1ff;
5802 /* BLKmode typically uses LDP of X-registers. */
5803 if (mode
== BLKmode
)
5804 base_offset
= (offset
+ 512) & ~0x3ff;
5806 /* Small negative offsets are supported. */
5807 else if (IN_RANGE (offset
, -256, 0))
5809 else if (mode
== TImode
|| mode
== TFmode
)
5810 base_offset
= (offset
+ 0x100) & ~0x1ff;
5811 /* Use 12-bit offset by access size. */
5813 base_offset
= offset
& (~0xfff * GET_MODE_SIZE (mode
));
5815 if (base_offset
!= 0)
5817 base
= plus_constant (Pmode
, base
, base_offset
);
5818 base
= force_operand (base
, NULL_RTX
);
5819 return plus_constant (Pmode
, base
, offset
- base_offset
);
5826 /* Return the reload icode required for a constant pool in mode. */
5827 static enum insn_code
5828 aarch64_constant_pool_reload_icode (machine_mode mode
)
5833 return CODE_FOR_aarch64_reload_movcpsfdi
;
5836 return CODE_FOR_aarch64_reload_movcpdfdi
;
5839 return CODE_FOR_aarch64_reload_movcptfdi
;
5842 return CODE_FOR_aarch64_reload_movcpv8qidi
;
5845 return CODE_FOR_aarch64_reload_movcpv16qidi
;
5848 return CODE_FOR_aarch64_reload_movcpv4hidi
;
5851 return CODE_FOR_aarch64_reload_movcpv8hidi
;
5854 return CODE_FOR_aarch64_reload_movcpv2sidi
;
5857 return CODE_FOR_aarch64_reload_movcpv4sidi
;
5860 return CODE_FOR_aarch64_reload_movcpv2didi
;
5863 return CODE_FOR_aarch64_reload_movcpv2dfdi
;
5872 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
5875 secondary_reload_info
*sri
)
5878 /* If we have to disable direct literal pool loads and stores because the
5879 function is too big, then we need a scratch register. */
5880 if (MEM_P (x
) && GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
)
5881 && (SCALAR_FLOAT_MODE_P (GET_MODE (x
))
5882 || targetm
.vector_mode_supported_p (GET_MODE (x
)))
5883 && !aarch64_pcrelative_literal_loads
)
5885 sri
->icode
= aarch64_constant_pool_reload_icode (mode
);
5889 /* Without the TARGET_SIMD instructions we cannot move a Q register
5890 to a Q register directly. We need a scratch. */
5891 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
5892 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
5893 && reg_class_subset_p (rclass
, FP_REGS
))
5896 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
5897 else if (mode
== TImode
)
5898 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
5902 /* A TFmode or TImode memory access should be handled via an FP_REGS
5903 because AArch64 has richer addressing modes for LDR/STR instructions
5904 than LDP/STP instructions. */
5905 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
5906 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
5909 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
5910 return GENERAL_REGS
;
5916 aarch64_can_eliminate (const int from
, const int to
)
5918 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5919 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5921 if (frame_pointer_needed
)
5923 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5925 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
5927 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
5928 && !cfun
->calls_alloca
)
5930 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5940 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
5942 aarch64_layout_frame ();
5944 if (to
== HARD_FRAME_POINTER_REGNUM
)
5946 if (from
== ARG_POINTER_REGNUM
)
5947 return cfun
->machine
->frame
.hard_fp_offset
;
5949 if (from
== FRAME_POINTER_REGNUM
)
5950 return cfun
->machine
->frame
.hard_fp_offset
5951 - cfun
->machine
->frame
.locals_offset
;
5954 if (to
== STACK_POINTER_REGNUM
)
5956 if (from
== FRAME_POINTER_REGNUM
)
5957 return cfun
->machine
->frame
.frame_size
5958 - cfun
->machine
->frame
.locals_offset
;
5961 return cfun
->machine
->frame
.frame_size
;
5964 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
5968 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
5972 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
5977 aarch64_asm_trampoline_template (FILE *f
)
5981 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
5982 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
5986 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
5987 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
5989 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
5990 assemble_aligned_integer (4, const0_rtx
);
5991 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5992 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5996 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
5998 rtx fnaddr
, mem
, a_tramp
;
5999 const int tramp_code_sz
= 16;
6001 /* Don't need to copy the trailing D-words, we fill those in below. */
6002 emit_block_move (m_tramp
, assemble_trampoline_template (),
6003 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
6004 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
6005 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
6006 if (GET_MODE (fnaddr
) != ptr_mode
)
6007 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
6008 emit_move_insn (mem
, fnaddr
);
6010 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
6011 emit_move_insn (mem
, chain_value
);
6013 /* XXX We should really define a "clear_cache" pattern and use
6014 gen_clear_cache(). */
6015 a_tramp
= XEXP (m_tramp
, 0);
6016 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
6017 LCT_NORMAL
, VOIDmode
, a_tramp
, ptr_mode
,
6018 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
6022 static unsigned char
6023 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
6027 case CALLER_SAVE_REGS
:
6031 case POINTER_AND_FP_REGS
:
6035 aarch64_vector_mode_p (mode
)
6036 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
6037 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6051 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
6053 if (regclass
== POINTER_REGS
)
6054 return GENERAL_REGS
;
6056 if (regclass
== STACK_REG
)
6059 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
6065 /* Register eliminiation can result in a request for
6066 SP+constant->FP_REGS. We cannot support such operations which
6067 use SP as source and an FP_REG as destination, so reject out
6069 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
6071 rtx lhs
= XEXP (x
, 0);
6073 /* Look through a possible SUBREG introduced by ILP32. */
6074 if (GET_CODE (lhs
) == SUBREG
)
6075 lhs
= SUBREG_REG (lhs
);
6077 gcc_assert (REG_P (lhs
));
6078 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
6087 aarch64_asm_output_labelref (FILE* f
, const char *name
)
6089 asm_fprintf (f
, "%U%s", name
);
6093 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
6095 if (priority
== DEFAULT_INIT_PRIORITY
)
6096 default_ctor_section_asm_out_constructor (symbol
, priority
);
6100 /* While priority is known to be in range [0, 65535], so 18 bytes
6101 would be enough, the compiler might not know that. To avoid
6102 -Wformat-truncation false positive, use a larger size. */
6104 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
6105 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL
);
6106 switch_to_section (s
);
6107 assemble_align (POINTER_SIZE
);
6108 assemble_aligned_integer (POINTER_BYTES
, symbol
);
6113 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
6115 if (priority
== DEFAULT_INIT_PRIORITY
)
6116 default_dtor_section_asm_out_destructor (symbol
, priority
);
6120 /* While priority is known to be in range [0, 65535], so 18 bytes
6121 would be enough, the compiler might not know that. To avoid
6122 -Wformat-truncation false positive, use a larger size. */
6124 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
6125 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL
);
6126 switch_to_section (s
);
6127 assemble_align (POINTER_SIZE
);
6128 assemble_aligned_integer (POINTER_BYTES
, symbol
);
6133 aarch64_output_casesi (rtx
*operands
)
6137 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
6139 static const char *const patterns
[4][2] =
6142 "ldrb\t%w3, [%0,%w1,uxtw]",
6143 "add\t%3, %4, %w3, sxtb #2"
6146 "ldrh\t%w3, [%0,%w1,uxtw #1]",
6147 "add\t%3, %4, %w3, sxth #2"
6150 "ldr\t%w3, [%0,%w1,uxtw #2]",
6151 "add\t%3, %4, %w3, sxtw #2"
6153 /* We assume that DImode is only generated when not optimizing and
6154 that we don't really need 64-bit address offsets. That would
6155 imply an object file with 8GB of code in a single function! */
6157 "ldr\t%w3, [%0,%w1,uxtw #2]",
6158 "add\t%3, %4, %w3, sxtw #2"
6162 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
6164 scalar_int_mode mode
= as_a
<scalar_int_mode
> (GET_MODE (diff_vec
));
6165 index
= exact_log2 (GET_MODE_SIZE (mode
));
6167 gcc_assert (index
>= 0 && index
<= 3);
6169 /* Need to implement table size reduction, by chaning the code below. */
6170 output_asm_insn (patterns
[index
][0], operands
);
6171 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
6172 snprintf (buf
, sizeof (buf
),
6173 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
6174 output_asm_insn (buf
, operands
);
6175 output_asm_insn (patterns
[index
][1], operands
);
6176 output_asm_insn ("br\t%3", operands
);
6177 assemble_label (asm_out_file
, label
);
6182 /* Return size in bits of an arithmetic operand which is shifted/scaled and
6183 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
6187 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
6189 if (shift
>= 0 && shift
<= 3)
6192 for (size
= 8; size
<= 32; size
*= 2)
6194 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
6195 if (mask
== bits
<< shift
)
6202 /* Constant pools are per function only when PC relative
6203 literal loads are true or we are in the large memory
6207 aarch64_can_use_per_function_literal_pools_p (void)
6209 return (aarch64_pcrelative_literal_loads
6210 || aarch64_cmodel
== AARCH64_CMODEL_LARGE
);
6214 aarch64_use_blocks_for_constant_p (machine_mode
, const_rtx
)
6216 /* Fixme:: In an ideal world this would work similar
6217 to the logic in aarch64_select_rtx_section but this
6218 breaks bootstrap in gcc go. For now we workaround
6219 this by returning false here. */
6223 /* Select appropriate section for constants depending
6224 on where we place literal pools. */
6227 aarch64_select_rtx_section (machine_mode mode
,
6229 unsigned HOST_WIDE_INT align
)
6231 if (aarch64_can_use_per_function_literal_pools_p ())
6232 return function_section (current_function_decl
);
6234 return default_elf_select_rtx_section (mode
, x
, align
);
6237 /* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6239 aarch64_asm_output_pool_epilogue (FILE *f
, const char *, tree
,
6240 HOST_WIDE_INT offset
)
6242 /* When using per-function literal pools, we must ensure that any code
6243 section is aligned to the minimal instruction length, lest we get
6244 errors from the assembler re "unaligned instructions". */
6245 if ((offset
& 3) && aarch64_can_use_per_function_literal_pools_p ())
6246 ASM_OUTPUT_ALIGN (f
, 2);
6251 /* Helper function for rtx cost calculation. Strip a shift expression
6252 from X. Returns the inner operand if successful, or the original
6253 expression on failure. */
6255 aarch64_strip_shift (rtx x
)
6259 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6260 we can convert both to ROR during final output. */
6261 if ((GET_CODE (op
) == ASHIFT
6262 || GET_CODE (op
) == ASHIFTRT
6263 || GET_CODE (op
) == LSHIFTRT
6264 || GET_CODE (op
) == ROTATERT
6265 || GET_CODE (op
) == ROTATE
)
6266 && CONST_INT_P (XEXP (op
, 1)))
6267 return XEXP (op
, 0);
6269 if (GET_CODE (op
) == MULT
6270 && CONST_INT_P (XEXP (op
, 1))
6271 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
6272 return XEXP (op
, 0);
6277 /* Helper function for rtx cost calculation. Strip an extend
6278 expression from X. Returns the inner operand if successful, or the
6279 original expression on failure. We deal with a number of possible
6280 canonicalization variations here. If STRIP_SHIFT is true, then
6281 we can strip off a shift also. */
6283 aarch64_strip_extend (rtx x
, bool strip_shift
)
6285 scalar_int_mode mode
;
6288 if (!is_a
<scalar_int_mode
> (GET_MODE (op
), &mode
))
6291 /* Zero and sign extraction of a widened value. */
6292 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
6293 && XEXP (op
, 2) == const0_rtx
6294 && GET_CODE (XEXP (op
, 0)) == MULT
6295 && aarch64_is_extend_from_extract (mode
, XEXP (XEXP (op
, 0), 1),
6297 return XEXP (XEXP (op
, 0), 0);
6299 /* It can also be represented (for zero-extend) as an AND with an
6301 if (GET_CODE (op
) == AND
6302 && GET_CODE (XEXP (op
, 0)) == MULT
6303 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
6304 && CONST_INT_P (XEXP (op
, 1))
6305 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
6306 INTVAL (XEXP (op
, 1))) != 0)
6307 return XEXP (XEXP (op
, 0), 0);
6309 /* Now handle extended register, as this may also have an optional
6310 left shift by 1..4. */
6312 && GET_CODE (op
) == ASHIFT
6313 && CONST_INT_P (XEXP (op
, 1))
6314 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
6317 if (GET_CODE (op
) == ZERO_EXTEND
6318 || GET_CODE (op
) == SIGN_EXTEND
)
6327 /* Return true iff CODE is a shift supported in combination
6328 with arithmetic instructions. */
6331 aarch64_shift_p (enum rtx_code code
)
6333 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
6337 /* Return true iff X is a cheap shift without a sign extend. */
6340 aarch64_cheap_mult_shift_p (rtx x
)
6347 if (!(aarch64_tune_params
.extra_tuning_flags
6348 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
))
6351 if (GET_CODE (op0
) == SIGN_EXTEND
)
6354 if (GET_CODE (x
) == ASHIFT
&& CONST_INT_P (op1
)
6355 && UINTVAL (op1
) <= 4)
6358 if (GET_CODE (x
) != MULT
|| !CONST_INT_P (op1
))
6361 HOST_WIDE_INT l2
= exact_log2 (INTVAL (op1
));
6363 if (l2
> 0 && l2
<= 4)
6369 /* Helper function for rtx cost calculation. Calculate the cost of
6370 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6371 Return the calculated cost of the expression, recursing manually in to
6372 operands where needed. */
6375 aarch64_rtx_mult_cost (rtx x
, enum rtx_code code
, int outer
, bool speed
)
6378 const struct cpu_cost_table
*extra_cost
6379 = aarch64_tune_params
.insn_extra_cost
;
6381 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
6382 machine_mode mode
= GET_MODE (x
);
6384 gcc_checking_assert (code
== MULT
);
6389 if (VECTOR_MODE_P (mode
))
6390 mode
= GET_MODE_INNER (mode
);
6392 /* Integer multiply/fma. */
6393 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6395 /* The multiply will be canonicalized as a shift, cost it as such. */
6396 if (aarch64_shift_p (GET_CODE (x
))
6397 || (CONST_INT_P (op1
)
6398 && exact_log2 (INTVAL (op1
)) > 0))
6400 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
6401 || GET_CODE (op0
) == SIGN_EXTEND
;
6406 /* If the shift is considered cheap,
6407 then don't add any cost. */
6408 if (aarch64_cheap_mult_shift_p (x
))
6410 else if (REG_P (op1
))
6411 /* ARITH + shift-by-register. */
6412 cost
+= extra_cost
->alu
.arith_shift_reg
;
6414 /* ARITH + extended register. We don't have a cost field
6415 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6416 cost
+= extra_cost
->alu
.extend_arith
;
6418 /* ARITH + shift-by-immediate. */
6419 cost
+= extra_cost
->alu
.arith_shift
;
6422 /* LSL (immediate). */
6423 cost
+= extra_cost
->alu
.shift
;
6426 /* Strip extends as we will have costed them in the case above. */
6428 op0
= aarch64_strip_extend (op0
, true);
6430 cost
+= rtx_cost (op0
, VOIDmode
, code
, 0, speed
);
6435 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6436 compound and let the below cases handle it. After all, MNEG is a
6437 special-case alias of MSUB. */
6438 if (GET_CODE (op0
) == NEG
)
6440 op0
= XEXP (op0
, 0);
6444 /* Integer multiplies or FMAs have zero/sign extending variants. */
6445 if ((GET_CODE (op0
) == ZERO_EXTEND
6446 && GET_CODE (op1
) == ZERO_EXTEND
)
6447 || (GET_CODE (op0
) == SIGN_EXTEND
6448 && GET_CODE (op1
) == SIGN_EXTEND
))
6450 cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, MULT
, 0, speed
);
6451 cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, MULT
, 1, speed
);
6456 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
6457 cost
+= extra_cost
->mult
[0].extend_add
;
6459 /* MUL/SMULL/UMULL. */
6460 cost
+= extra_cost
->mult
[0].extend
;
6466 /* This is either an integer multiply or a MADD. In both cases
6467 we want to recurse and cost the operands. */
6468 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6469 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6475 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
6478 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
6487 /* Floating-point FMA/FMUL can also support negations of the
6488 operands, unless the rounding mode is upward or downward in
6489 which case FNMUL is different than FMUL with operand negation. */
6490 bool neg0
= GET_CODE (op0
) == NEG
;
6491 bool neg1
= GET_CODE (op1
) == NEG
;
6492 if (compound_p
|| !flag_rounding_math
|| (neg0
&& neg1
))
6495 op0
= XEXP (op0
, 0);
6497 op1
= XEXP (op1
, 0);
6501 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6502 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6505 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
6508 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6509 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6515 aarch64_address_cost (rtx x
,
6517 addr_space_t as ATTRIBUTE_UNUSED
,
6520 enum rtx_code c
= GET_CODE (x
);
6521 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
.addr_cost
;
6522 struct aarch64_address_info info
;
6526 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
6528 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
6530 /* This is a CONST or SYMBOL ref which will be split
6531 in a different way depending on the code model in use.
6532 Cost it through the generic infrastructure. */
6533 int cost_symbol_ref
= rtx_cost (x
, Pmode
, MEM
, 1, speed
);
6534 /* Divide through by the cost of one instruction to
6535 bring it to the same units as the address costs. */
6536 cost_symbol_ref
/= COSTS_N_INSNS (1);
6537 /* The cost is then the cost of preparing the address,
6538 followed by an immediate (possibly 0) offset. */
6539 return cost_symbol_ref
+ addr_cost
->imm_offset
;
6543 /* This is most likely a jump table from a case
6545 return addr_cost
->register_offset
;
6551 case ADDRESS_LO_SUM
:
6552 case ADDRESS_SYMBOLIC
:
6553 case ADDRESS_REG_IMM
:
6554 cost
+= addr_cost
->imm_offset
;
6557 case ADDRESS_REG_WB
:
6558 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
6559 cost
+= addr_cost
->pre_modify
;
6560 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
6561 cost
+= addr_cost
->post_modify
;
6567 case ADDRESS_REG_REG
:
6568 cost
+= addr_cost
->register_offset
;
6571 case ADDRESS_REG_SXTW
:
6572 cost
+= addr_cost
->register_sextend
;
6575 case ADDRESS_REG_UXTW
:
6576 cost
+= addr_cost
->register_zextend
;
6586 /* For the sake of calculating the cost of the shifted register
6587 component, we can treat same sized modes in the same way. */
6588 switch (GET_MODE_BITSIZE (mode
))
6591 cost
+= addr_cost
->addr_scale_costs
.hi
;
6595 cost
+= addr_cost
->addr_scale_costs
.si
;
6599 cost
+= addr_cost
->addr_scale_costs
.di
;
6602 /* We can't tell, or this is a 128-bit vector. */
6604 cost
+= addr_cost
->addr_scale_costs
.ti
;
6612 /* Return the cost of a branch. If SPEED_P is true then the compiler is
6613 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6617 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
6619 /* When optimizing for speed, use the cost of unpredictable branches. */
6620 const struct cpu_branch_cost
*branch_costs
=
6621 aarch64_tune_params
.branch_costs
;
6623 if (!speed_p
|| predictable_p
)
6624 return branch_costs
->predictable
;
6626 return branch_costs
->unpredictable
;
6629 /* Return true if the RTX X in mode MODE is a zero or sign extract
6630 usable in an ADD or SUB (extended register) instruction. */
6632 aarch64_rtx_arith_op_extract_p (rtx x
, scalar_int_mode mode
)
6634 /* Catch add with a sign extract.
6635 This is add_<optab><mode>_multp2. */
6636 if (GET_CODE (x
) == SIGN_EXTRACT
6637 || GET_CODE (x
) == ZERO_EXTRACT
)
6639 rtx op0
= XEXP (x
, 0);
6640 rtx op1
= XEXP (x
, 1);
6641 rtx op2
= XEXP (x
, 2);
6643 if (GET_CODE (op0
) == MULT
6644 && CONST_INT_P (op1
)
6645 && op2
== const0_rtx
6646 && CONST_INT_P (XEXP (op0
, 1))
6647 && aarch64_is_extend_from_extract (mode
,
6654 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6656 else if (GET_CODE (x
) == SIGN_EXTEND
6657 || GET_CODE (x
) == ZERO_EXTEND
)
6658 return REG_P (XEXP (x
, 0));
6664 aarch64_frint_unspec_p (unsigned int u
)
6682 /* Return true iff X is an rtx that will match an extr instruction
6683 i.e. as described in the *extr<mode>5_insn family of patterns.
6684 OP0 and OP1 will be set to the operands of the shifts involved
6685 on success and will be NULL_RTX otherwise. */
6688 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
6691 scalar_int_mode mode
;
6692 if (!is_a
<scalar_int_mode
> (GET_MODE (x
), &mode
))
6695 *res_op0
= NULL_RTX
;
6696 *res_op1
= NULL_RTX
;
6698 if (GET_CODE (x
) != IOR
)
6704 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
6705 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
6707 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6708 if (GET_CODE (op1
) == ASHIFT
)
6709 std::swap (op0
, op1
);
6711 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
6714 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
6715 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
6717 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
6718 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
6720 *res_op0
= XEXP (op0
, 0);
6721 *res_op1
= XEXP (op1
, 0);
6729 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6730 storing it in *COST. Result is true if the total cost of the operation
6731 has now been calculated. */
6733 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
6737 enum rtx_code cmpcode
;
6739 if (COMPARISON_P (op0
))
6741 inner
= XEXP (op0
, 0);
6742 comparator
= XEXP (op0
, 1);
6743 cmpcode
= GET_CODE (op0
);
6748 comparator
= const0_rtx
;
6752 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
6754 /* Conditional branch. */
6755 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6759 if (cmpcode
== NE
|| cmpcode
== EQ
)
6761 if (comparator
== const0_rtx
)
6763 /* TBZ/TBNZ/CBZ/CBNZ. */
6764 if (GET_CODE (inner
) == ZERO_EXTRACT
)
6766 *cost
+= rtx_cost (XEXP (inner
, 0), VOIDmode
,
6767 ZERO_EXTRACT
, 0, speed
);
6770 *cost
+= rtx_cost (inner
, VOIDmode
, cmpcode
, 0, speed
);
6775 else if (cmpcode
== LT
|| cmpcode
== GE
)
6778 if (comparator
== const0_rtx
)
6783 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6786 if (GET_CODE (op1
) == COMPARE
)
6788 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6789 if (XEXP (op1
, 1) == const0_rtx
)
6793 machine_mode mode
= GET_MODE (XEXP (op1
, 0));
6794 const struct cpu_cost_table
*extra_cost
6795 = aarch64_tune_params
.insn_extra_cost
;
6797 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6798 *cost
+= extra_cost
->alu
.arith
;
6800 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6805 /* It's a conditional operation based on the status flags,
6806 so it must be some flavor of CSEL. */
6808 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6809 if (GET_CODE (op1
) == NEG
6810 || GET_CODE (op1
) == NOT
6811 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
6812 op1
= XEXP (op1
, 0);
6813 else if (GET_CODE (op1
) == ZERO_EXTEND
&& GET_CODE (op2
) == ZERO_EXTEND
)
6815 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6816 op1
= XEXP (op1
, 0);
6817 op2
= XEXP (op2
, 0);
6820 *cost
+= rtx_cost (op1
, VOIDmode
, IF_THEN_ELSE
, 1, speed
);
6821 *cost
+= rtx_cost (op2
, VOIDmode
, IF_THEN_ELSE
, 2, speed
);
6825 /* We don't know what this is, cost all operands. */
6829 /* Check whether X is a bitfield operation of the form shift + extend that
6830 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6831 operand to which the bitfield operation is applied. Otherwise return
6835 aarch64_extend_bitfield_pattern_p (rtx x
)
6837 rtx_code outer_code
= GET_CODE (x
);
6838 machine_mode outer_mode
= GET_MODE (x
);
6840 if (outer_code
!= ZERO_EXTEND
&& outer_code
!= SIGN_EXTEND
6841 && outer_mode
!= SImode
&& outer_mode
!= DImode
)
6844 rtx inner
= XEXP (x
, 0);
6845 rtx_code inner_code
= GET_CODE (inner
);
6846 machine_mode inner_mode
= GET_MODE (inner
);
6852 if (CONST_INT_P (XEXP (inner
, 1))
6853 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6854 op
= XEXP (inner
, 0);
6857 if (outer_code
== ZERO_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6858 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6859 op
= XEXP (inner
, 0);
6862 if (outer_code
== SIGN_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6863 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6864 op
= XEXP (inner
, 0);
6873 /* Return true if the mask and a shift amount from an RTX of the form
6874 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6875 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6878 aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode
, rtx mask
,
6881 return CONST_INT_P (mask
) && CONST_INT_P (shft_amnt
)
6882 && INTVAL (shft_amnt
) < GET_MODE_BITSIZE (mode
)
6883 && exact_log2 ((INTVAL (mask
) >> INTVAL (shft_amnt
)) + 1) >= 0
6884 && (INTVAL (mask
) & ((1 << INTVAL (shft_amnt
)) - 1)) == 0;
6887 /* Calculate the cost of calculating X, storing it in *COST. Result
6888 is true if the total cost of the operation has now been calculated. */
6890 aarch64_rtx_costs (rtx x
, machine_mode mode
, int outer ATTRIBUTE_UNUSED
,
6891 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
6894 const struct cpu_cost_table
*extra_cost
6895 = aarch64_tune_params
.insn_extra_cost
;
6896 int code
= GET_CODE (x
);
6897 scalar_int_mode int_mode
;
6899 /* By default, assume that everything has equivalent cost to the
6900 cheapest instruction. Any additional costs are applied as a delta
6901 above this default. */
6902 *cost
= COSTS_N_INSNS (1);
6907 /* The cost depends entirely on the operands to SET. */
6912 switch (GET_CODE (op0
))
6917 rtx address
= XEXP (op0
, 0);
6918 if (VECTOR_MODE_P (mode
))
6919 *cost
+= extra_cost
->ldst
.storev
;
6920 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6921 *cost
+= extra_cost
->ldst
.store
;
6922 else if (mode
== SFmode
)
6923 *cost
+= extra_cost
->ldst
.storef
;
6924 else if (mode
== DFmode
)
6925 *cost
+= extra_cost
->ldst
.stored
;
6928 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6932 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6936 if (! REG_P (SUBREG_REG (op0
)))
6937 *cost
+= rtx_cost (SUBREG_REG (op0
), VOIDmode
, SET
, 0, speed
);
6941 /* The cost is one per vector-register copied. */
6942 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
6944 int nregs
= aarch64_hard_regno_nregs (V0_REGNUM
, GET_MODE (op0
));
6945 *cost
= COSTS_N_INSNS (nregs
);
6947 /* const0_rtx is in general free, but we will use an
6948 instruction to set a register to 0. */
6949 else if (REG_P (op1
) || op1
== const0_rtx
)
6951 /* The cost is 1 per register copied. */
6952 int nregs
= aarch64_hard_regno_nregs (R0_REGNUM
, GET_MODE (op0
));
6953 *cost
= COSTS_N_INSNS (nregs
);
6956 /* Cost is just the cost of the RHS of the set. */
6957 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6962 /* Bit-field insertion. Strip any redundant widening of
6963 the RHS to meet the width of the target. */
6964 if (GET_CODE (op1
) == SUBREG
)
6965 op1
= SUBREG_REG (op1
);
6966 if ((GET_CODE (op1
) == ZERO_EXTEND
6967 || GET_CODE (op1
) == SIGN_EXTEND
)
6968 && CONST_INT_P (XEXP (op0
, 1))
6969 && is_a
<scalar_int_mode
> (GET_MODE (XEXP (op1
, 0)), &int_mode
)
6970 && GET_MODE_BITSIZE (int_mode
) >= INTVAL (XEXP (op0
, 1)))
6971 op1
= XEXP (op1
, 0);
6973 if (CONST_INT_P (op1
))
6975 /* MOV immediate is assumed to always be cheap. */
6976 *cost
= COSTS_N_INSNS (1);
6982 *cost
+= extra_cost
->alu
.bfi
;
6983 *cost
+= rtx_cost (op1
, VOIDmode
, (enum rtx_code
) code
, 1, speed
);
6989 /* We can't make sense of this, assume default cost. */
6990 *cost
= COSTS_N_INSNS (1);
6996 /* If an instruction can incorporate a constant within the
6997 instruction, the instruction's expression avoids calling
6998 rtx_cost() on the constant. If rtx_cost() is called on a
6999 constant, then it is usually because the constant must be
7000 moved into a register by one or more instructions.
7002 The exception is constant 0, which can be expressed
7003 as XZR/WZR and is therefore free. The exception to this is
7004 if we have (set (reg) (const0_rtx)) in which case we must cost
7005 the move. However, we can catch that when we cost the SET, so
7006 we don't need to consider that here. */
7007 if (x
== const0_rtx
)
7011 /* To an approximation, building any other constant is
7012 proportionally expensive to the number of instructions
7013 required to build that constant. This is true whether we
7014 are compiling for SPEED or otherwise. */
7015 if (!is_a
<scalar_int_mode
> (mode
, &int_mode
))
7016 int_mode
= word_mode
;
7017 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
7018 (NULL_RTX
, x
, false, int_mode
));
7024 /* First determine number of instructions to do the move
7025 as an integer constant. */
7026 if (!aarch64_float_const_representable_p (x
)
7027 && !aarch64_can_const_movi_rtx_p (x
, mode
)
7028 && aarch64_float_const_rtx_p (x
))
7030 unsigned HOST_WIDE_INT ival
;
7031 bool succeed
= aarch64_reinterpret_float_as_int (x
, &ival
);
7032 gcc_assert (succeed
);
7034 scalar_int_mode imode
= (mode
== HFmode
7036 : int_mode_for_mode (mode
).require ());
7037 int ncost
= aarch64_internal_mov_immediate
7038 (NULL_RTX
, gen_int_mode (ival
, imode
), false, imode
);
7039 *cost
+= COSTS_N_INSNS (ncost
);
7045 /* mov[df,sf]_aarch64. */
7046 if (aarch64_float_const_representable_p (x
))
7047 /* FMOV (scalar immediate). */
7048 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
7049 else if (!aarch64_float_const_zero_rtx_p (x
))
7051 /* This will be a load from memory. */
7053 *cost
+= extra_cost
->ldst
.loadd
;
7055 *cost
+= extra_cost
->ldst
.loadf
;
7058 /* Otherwise this is +0.0. We get this using MOVI d0, #0
7059 or MOV v0.s[0], wzr - neither of which are modeled by the
7060 cost tables. Just use the default cost. */
7070 /* For loads we want the base cost of a load, plus an
7071 approximation for the additional cost of the addressing
7073 rtx address
= XEXP (x
, 0);
7074 if (VECTOR_MODE_P (mode
))
7075 *cost
+= extra_cost
->ldst
.loadv
;
7076 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7077 *cost
+= extra_cost
->ldst
.load
;
7078 else if (mode
== SFmode
)
7079 *cost
+= extra_cost
->ldst
.loadf
;
7080 else if (mode
== DFmode
)
7081 *cost
+= extra_cost
->ldst
.loadd
;
7084 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
7093 if (VECTOR_MODE_P (mode
))
7098 *cost
+= extra_cost
->vect
.alu
;
7103 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7105 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
7106 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
7109 *cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, NEG
, 0, speed
);
7113 /* Cost this as SUB wzr, X. */
7114 op0
= CONST0_RTX (mode
);
7119 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7121 /* Support (neg(fma...)) as a single instruction only if
7122 sign of zeros is unimportant. This matches the decision
7123 making in aarch64.md. */
7124 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
7127 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
7130 if (GET_CODE (op0
) == MULT
)
7133 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
7138 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
7148 if (VECTOR_MODE_P (mode
))
7149 *cost
+= extra_cost
->vect
.alu
;
7151 *cost
+= extra_cost
->alu
.clz
;
7160 if (op1
== const0_rtx
7161 && GET_CODE (op0
) == AND
)
7164 mode
= GET_MODE (op0
);
7168 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
7170 /* TODO: A write to the CC flags possibly costs extra, this
7171 needs encoding in the cost tables. */
7173 mode
= GET_MODE (op0
);
7175 if (GET_CODE (op0
) == AND
)
7181 if (GET_CODE (op0
) == PLUS
)
7183 /* ADDS (and CMN alias). */
7188 if (GET_CODE (op0
) == MINUS
)
7195 if (GET_CODE (op0
) == ZERO_EXTRACT
&& op1
== const0_rtx
7196 && GET_MODE (x
) == CC_NZmode
&& CONST_INT_P (XEXP (op0
, 1))
7197 && CONST_INT_P (XEXP (op0
, 2)))
7199 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
7200 Handle it here directly rather than going to cost_logic
7201 since we know the immediate generated for the TST is valid
7202 so we can avoid creating an intermediate rtx for it only
7203 for costing purposes. */
7205 *cost
+= extra_cost
->alu
.logical
;
7207 *cost
+= rtx_cost (XEXP (op0
, 0), GET_MODE (op0
),
7208 ZERO_EXTRACT
, 0, speed
);
7212 if (GET_CODE (op1
) == NEG
)
7216 *cost
+= extra_cost
->alu
.arith
;
7218 *cost
+= rtx_cost (op0
, mode
, COMPARE
, 0, speed
);
7219 *cost
+= rtx_cost (XEXP (op1
, 0), mode
, NEG
, 1, speed
);
7225 Compare can freely swap the order of operands, and
7226 canonicalization puts the more complex operation first.
7227 But the integer MINUS logic expects the shift/extend
7228 operation in op1. */
7230 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
7238 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
7242 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
7244 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
7246 *cost
+= rtx_cost (op0
, VOIDmode
, COMPARE
, 0, speed
);
7247 /* FCMP supports constant 0.0 for no extra cost. */
7253 if (VECTOR_MODE_P (mode
))
7255 /* Vector compare. */
7257 *cost
+= extra_cost
->vect
.alu
;
7259 if (aarch64_float_const_zero_rtx_p (op1
))
7261 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7275 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed
);
7277 /* Detect valid immediates. */
7278 if ((GET_MODE_CLASS (mode
) == MODE_INT
7279 || (GET_MODE_CLASS (mode
) == MODE_CC
7280 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
7281 && CONST_INT_P (op1
)
7282 && aarch64_uimm12_shift (INTVAL (op1
)))
7285 /* SUB(S) (immediate). */
7286 *cost
+= extra_cost
->alu
.arith
;
7290 /* Look for SUB (extended register). */
7291 if (is_a
<scalar_int_mode
> (mode
, &int_mode
)
7292 && aarch64_rtx_arith_op_extract_p (op1
, int_mode
))
7295 *cost
+= extra_cost
->alu
.extend_arith
;
7297 op1
= aarch64_strip_extend (op1
, true);
7298 *cost
+= rtx_cost (op1
, VOIDmode
,
7299 (enum rtx_code
) GET_CODE (op1
), 0, speed
);
7303 rtx new_op1
= aarch64_strip_extend (op1
, false);
7305 /* Cost this as an FMA-alike operation. */
7306 if ((GET_CODE (new_op1
) == MULT
7307 || aarch64_shift_p (GET_CODE (new_op1
)))
7310 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
7311 (enum rtx_code
) code
,
7316 *cost
+= rtx_cost (new_op1
, VOIDmode
, MINUS
, 1, speed
);
7320 if (VECTOR_MODE_P (mode
))
7323 *cost
+= extra_cost
->vect
.alu
;
7325 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7328 *cost
+= extra_cost
->alu
.arith
;
7330 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7333 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7347 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
7348 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
7351 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, PLUS
, 0, speed
);
7352 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7356 if (GET_MODE_CLASS (mode
) == MODE_INT
7357 && CONST_INT_P (op1
)
7358 && aarch64_uimm12_shift (INTVAL (op1
)))
7360 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed
);
7363 /* ADD (immediate). */
7364 *cost
+= extra_cost
->alu
.arith
;
7368 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7370 /* Look for ADD (extended register). */
7371 if (is_a
<scalar_int_mode
> (mode
, &int_mode
)
7372 && aarch64_rtx_arith_op_extract_p (op0
, int_mode
))
7375 *cost
+= extra_cost
->alu
.extend_arith
;
7377 op0
= aarch64_strip_extend (op0
, true);
7378 *cost
+= rtx_cost (op0
, VOIDmode
,
7379 (enum rtx_code
) GET_CODE (op0
), 0, speed
);
7383 /* Strip any extend, leave shifts behind as we will
7384 cost them through mult_cost. */
7385 new_op0
= aarch64_strip_extend (op0
, false);
7387 if (GET_CODE (new_op0
) == MULT
7388 || aarch64_shift_p (GET_CODE (new_op0
)))
7390 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
7395 *cost
+= rtx_cost (new_op0
, VOIDmode
, PLUS
, 0, speed
);
7399 if (VECTOR_MODE_P (mode
))
7402 *cost
+= extra_cost
->vect
.alu
;
7404 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7407 *cost
+= extra_cost
->alu
.arith
;
7409 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7412 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7419 *cost
= COSTS_N_INSNS (1);
7423 if (VECTOR_MODE_P (mode
))
7424 *cost
+= extra_cost
->vect
.alu
;
7426 *cost
+= extra_cost
->alu
.rev
;
7431 if (aarch_rev16_p (x
))
7433 *cost
= COSTS_N_INSNS (1);
7437 if (VECTOR_MODE_P (mode
))
7438 *cost
+= extra_cost
->vect
.alu
;
7440 *cost
+= extra_cost
->alu
.rev
;
7445 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
7447 *cost
+= rtx_cost (op0
, mode
, IOR
, 0, speed
);
7448 *cost
+= rtx_cost (op1
, mode
, IOR
, 1, speed
);
7450 *cost
+= extra_cost
->alu
.shift
;
7461 if (VECTOR_MODE_P (mode
))
7464 *cost
+= extra_cost
->vect
.alu
;
7469 && GET_CODE (op0
) == MULT
7470 && CONST_INT_P (XEXP (op0
, 1))
7471 && CONST_INT_P (op1
)
7472 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
7475 /* This is a UBFM/SBFM. */
7476 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, ZERO_EXTRACT
, 0, speed
);
7478 *cost
+= extra_cost
->alu
.bfx
;
7482 if (is_int_mode (mode
, &int_mode
))
7484 if (CONST_INT_P (op1
))
7486 /* We have a mask + shift version of a UBFIZ
7487 i.e. the *andim_ashift<mode>_bfiz pattern. */
7488 if (GET_CODE (op0
) == ASHIFT
7489 && aarch64_mask_and_shift_for_ubfiz_p (int_mode
, op1
,
7492 *cost
+= rtx_cost (XEXP (op0
, 0), int_mode
,
7493 (enum rtx_code
) code
, 0, speed
);
7495 *cost
+= extra_cost
->alu
.bfx
;
7499 else if (aarch64_bitmask_imm (INTVAL (op1
), int_mode
))
7501 /* We possibly get the immediate for free, this is not
7503 *cost
+= rtx_cost (op0
, int_mode
,
7504 (enum rtx_code
) code
, 0, speed
);
7506 *cost
+= extra_cost
->alu
.logical
;
7515 /* Handle ORN, EON, or BIC. */
7516 if (GET_CODE (op0
) == NOT
)
7517 op0
= XEXP (op0
, 0);
7519 new_op0
= aarch64_strip_shift (op0
);
7521 /* If we had a shift on op0 then this is a logical-shift-
7522 by-register/immediate operation. Otherwise, this is just
7523 a logical operation. */
7528 /* Shift by immediate. */
7529 if (CONST_INT_P (XEXP (op0
, 1)))
7530 *cost
+= extra_cost
->alu
.log_shift
;
7532 *cost
+= extra_cost
->alu
.log_shift_reg
;
7535 *cost
+= extra_cost
->alu
.logical
;
7538 /* In both cases we want to cost both operands. */
7539 *cost
+= rtx_cost (new_op0
, int_mode
, (enum rtx_code
) code
,
7541 *cost
+= rtx_cost (op1
, int_mode
, (enum rtx_code
) code
,
7551 op0
= aarch64_strip_shift (x
);
7553 if (VECTOR_MODE_P (mode
))
7556 *cost
+= extra_cost
->vect
.alu
;
7560 /* MVN-shifted-reg. */
7563 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7566 *cost
+= extra_cost
->alu
.log_shift
;
7570 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7571 Handle the second form here taking care that 'a' in the above can
7573 else if (GET_CODE (op0
) == XOR
)
7575 rtx newop0
= XEXP (op0
, 0);
7576 rtx newop1
= XEXP (op0
, 1);
7577 rtx op0_stripped
= aarch64_strip_shift (newop0
);
7579 *cost
+= rtx_cost (newop1
, mode
, (enum rtx_code
) code
, 1, speed
);
7580 *cost
+= rtx_cost (op0_stripped
, mode
, XOR
, 0, speed
);
7584 if (op0_stripped
!= newop0
)
7585 *cost
+= extra_cost
->alu
.log_shift
;
7587 *cost
+= extra_cost
->alu
.logical
;
7594 *cost
+= extra_cost
->alu
.logical
;
7601 /* If a value is written in SI mode, then zero extended to DI
7602 mode, the operation will in general be free as a write to
7603 a 'w' register implicitly zeroes the upper bits of an 'x'
7604 register. However, if this is
7606 (set (reg) (zero_extend (reg)))
7608 we must cost the explicit register move. */
7610 && GET_MODE (op0
) == SImode
7613 int op_cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, 0, speed
);
7615 /* If OP_COST is non-zero, then the cost of the zero extend
7616 is effectively the cost of the inner operation. Otherwise
7617 we have a MOV instruction and we take the cost from the MOV
7618 itself. This is true independently of whether we are
7619 optimizing for space or time. */
7625 else if (MEM_P (op0
))
7627 /* All loads can zero extend to any size for free. */
7628 *cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, param
, speed
);
7632 op0
= aarch64_extend_bitfield_pattern_p (x
);
7635 *cost
+= rtx_cost (op0
, mode
, ZERO_EXTEND
, 0, speed
);
7637 *cost
+= extra_cost
->alu
.bfx
;
7643 if (VECTOR_MODE_P (mode
))
7646 *cost
+= extra_cost
->vect
.alu
;
7650 /* We generate an AND instead of UXTB/UXTH. */
7651 *cost
+= extra_cost
->alu
.logical
;
7657 if (MEM_P (XEXP (x
, 0)))
7662 rtx address
= XEXP (XEXP (x
, 0), 0);
7663 *cost
+= extra_cost
->ldst
.load_sign_extend
;
7666 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
7672 op0
= aarch64_extend_bitfield_pattern_p (x
);
7675 *cost
+= rtx_cost (op0
, mode
, SIGN_EXTEND
, 0, speed
);
7677 *cost
+= extra_cost
->alu
.bfx
;
7683 if (VECTOR_MODE_P (mode
))
7684 *cost
+= extra_cost
->vect
.alu
;
7686 *cost
+= extra_cost
->alu
.extend
;
7694 if (CONST_INT_P (op1
))
7698 if (VECTOR_MODE_P (mode
))
7700 /* Vector shift (immediate). */
7701 *cost
+= extra_cost
->vect
.alu
;
7705 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7707 *cost
+= extra_cost
->alu
.shift
;
7711 /* We can incorporate zero/sign extend for free. */
7712 if (GET_CODE (op0
) == ZERO_EXTEND
7713 || GET_CODE (op0
) == SIGN_EXTEND
)
7714 op0
= XEXP (op0
, 0);
7716 *cost
+= rtx_cost (op0
, VOIDmode
, ASHIFT
, 0, speed
);
7721 if (VECTOR_MODE_P (mode
))
7724 /* Vector shift (register). */
7725 *cost
+= extra_cost
->vect
.alu
;
7731 *cost
+= extra_cost
->alu
.shift_reg
;
7733 if (GET_CODE (op1
) == AND
&& REG_P (XEXP (op1
, 0))
7734 && CONST_INT_P (XEXP (op1
, 1))
7735 && INTVAL (XEXP (op1
, 1)) == GET_MODE_BITSIZE (mode
) - 1)
7737 *cost
+= rtx_cost (op0
, mode
, (rtx_code
) code
, 0, speed
);
7738 /* We already demanded XEXP (op1, 0) to be REG_P, so
7739 don't recurse into it. */
7743 return false; /* All arguments need to be in registers. */
7753 if (CONST_INT_P (op1
))
7755 /* ASR (immediate) and friends. */
7758 if (VECTOR_MODE_P (mode
))
7759 *cost
+= extra_cost
->vect
.alu
;
7761 *cost
+= extra_cost
->alu
.shift
;
7764 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7769 if (VECTOR_MODE_P (mode
))
7772 /* Vector shift (register). */
7773 *cost
+= extra_cost
->vect
.alu
;
7778 /* ASR (register) and friends. */
7779 *cost
+= extra_cost
->alu
.shift_reg
;
7781 if (GET_CODE (op1
) == AND
&& REG_P (XEXP (op1
, 0))
7782 && CONST_INT_P (XEXP (op1
, 1))
7783 && INTVAL (XEXP (op1
, 1)) == GET_MODE_BITSIZE (mode
) - 1)
7785 *cost
+= rtx_cost (op0
, mode
, (rtx_code
) code
, 0, speed
);
7786 /* We already demanded XEXP (op1, 0) to be REG_P, so
7787 don't recurse into it. */
7791 return false; /* All arguments need to be in registers. */
7796 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
7797 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
)
7801 *cost
+= extra_cost
->ldst
.load
;
7803 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
7804 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
7806 /* ADRP, followed by ADD. */
7807 *cost
+= COSTS_N_INSNS (1);
7809 *cost
+= 2 * extra_cost
->alu
.arith
;
7811 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
7812 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
7816 *cost
+= extra_cost
->alu
.arith
;
7821 /* One extra load instruction, after accessing the GOT. */
7822 *cost
+= COSTS_N_INSNS (1);
7824 *cost
+= extra_cost
->ldst
.load
;
7830 /* ADRP/ADD (immediate). */
7832 *cost
+= extra_cost
->alu
.arith
;
7840 if (VECTOR_MODE_P (mode
))
7841 *cost
+= extra_cost
->vect
.alu
;
7843 *cost
+= extra_cost
->alu
.bfx
;
7846 /* We can trust that the immediates used will be correct (there
7847 are no by-register forms), so we need only cost op0. */
7848 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (enum rtx_code
) code
, 0, speed
);
7852 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
7853 /* aarch64_rtx_mult_cost always handles recursion to its
7858 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7859 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7860 an unconditional negate. This case should only ever be reached through
7861 the set_smod_pow2_cheap check in expmed.c. */
7862 if (CONST_INT_P (XEXP (x
, 1))
7863 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
7864 && (mode
== SImode
|| mode
== DImode
))
7866 /* We expand to 4 instructions. Reset the baseline. */
7867 *cost
= COSTS_N_INSNS (4);
7870 *cost
+= 2 * extra_cost
->alu
.logical
7871 + 2 * extra_cost
->alu
.arith
;
7880 /* Slighly prefer UMOD over SMOD. */
7881 if (VECTOR_MODE_P (mode
))
7882 *cost
+= extra_cost
->vect
.alu
;
7883 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7884 *cost
+= (extra_cost
->mult
[mode
== DImode
].add
7885 + extra_cost
->mult
[mode
== DImode
].idiv
7886 + (code
== MOD
? 1 : 0));
7888 return false; /* All arguments need to be in registers. */
7895 if (VECTOR_MODE_P (mode
))
7896 *cost
+= extra_cost
->vect
.alu
;
7897 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7898 /* There is no integer SQRT, so only DIV and UDIV can get
7900 *cost
+= (extra_cost
->mult
[mode
== DImode
].idiv
7901 /* Slighly prefer UDIV over SDIV. */
7902 + (code
== DIV
? 1 : 0));
7904 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
7906 return false; /* All arguments need to be in registers. */
7909 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
7910 XEXP (x
, 2), cost
, speed
);
7923 return false; /* All arguments must be in registers. */
7932 if (VECTOR_MODE_P (mode
))
7933 *cost
+= extra_cost
->vect
.alu
;
7935 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
7938 /* FMSUB, FNMADD, and FNMSUB are free. */
7939 if (GET_CODE (op0
) == NEG
)
7940 op0
= XEXP (op0
, 0);
7942 if (GET_CODE (op2
) == NEG
)
7943 op2
= XEXP (op2
, 0);
7945 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7946 and the by-element operand as operand 0. */
7947 if (GET_CODE (op1
) == NEG
)
7948 op1
= XEXP (op1
, 0);
7950 /* Catch vector-by-element operations. The by-element operand can
7951 either be (vec_duplicate (vec_select (x))) or just
7952 (vec_select (x)), depending on whether we are multiplying by
7953 a vector or a scalar.
7955 Canonicalization is not very good in these cases, FMA4 will put the
7956 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7957 if (GET_CODE (op0
) == VEC_DUPLICATE
)
7958 op0
= XEXP (op0
, 0);
7959 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
7960 op1
= XEXP (op1
, 0);
7962 if (GET_CODE (op0
) == VEC_SELECT
)
7963 op0
= XEXP (op0
, 0);
7964 else if (GET_CODE (op1
) == VEC_SELECT
)
7965 op1
= XEXP (op1
, 0);
7967 /* If the remaining parameters are not registers,
7968 get the cost to put them into registers. */
7969 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed
);
7970 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed
);
7971 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed
);
7975 case UNSIGNED_FLOAT
:
7977 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
7983 if (VECTOR_MODE_P (mode
))
7985 /*Vector truncate. */
7986 *cost
+= extra_cost
->vect
.alu
;
7989 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
7993 case FLOAT_TRUNCATE
:
7996 if (VECTOR_MODE_P (mode
))
7998 /*Vector conversion. */
7999 *cost
+= extra_cost
->vect
.alu
;
8002 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
8009 /* Strip the rounding part. They will all be implemented
8010 by the fcvt* family of instructions anyway. */
8011 if (GET_CODE (x
) == UNSPEC
)
8013 unsigned int uns_code
= XINT (x
, 1);
8015 if (uns_code
== UNSPEC_FRINTA
8016 || uns_code
== UNSPEC_FRINTM
8017 || uns_code
== UNSPEC_FRINTN
8018 || uns_code
== UNSPEC_FRINTP
8019 || uns_code
== UNSPEC_FRINTZ
)
8020 x
= XVECEXP (x
, 0, 0);
8025 if (VECTOR_MODE_P (mode
))
8026 *cost
+= extra_cost
->vect
.alu
;
8028 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
8031 /* We can combine fmul by a power of 2 followed by a fcvt into a single
8032 fixed-point fcvt. */
8033 if (GET_CODE (x
) == MULT
8034 && ((VECTOR_MODE_P (mode
)
8035 && aarch64_vec_fpconst_pow_of_2 (XEXP (x
, 1)) > 0)
8036 || aarch64_fpconst_pow_of_2 (XEXP (x
, 1)) > 0))
8038 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (rtx_code
) code
,
8043 *cost
+= rtx_cost (x
, VOIDmode
, (enum rtx_code
) code
, 0, speed
);
8047 if (VECTOR_MODE_P (mode
))
8051 *cost
+= extra_cost
->vect
.alu
;
8053 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8057 /* FABD, which is analogous to FADD. */
8058 if (GET_CODE (op0
) == MINUS
)
8060 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed
);
8061 *cost
+= rtx_cost (XEXP (op0
, 1), mode
, MINUS
, 1, speed
);
8063 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
8067 /* Simple FABS is analogous to FNEG. */
8069 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
8073 /* Integer ABS will either be split to
8074 two arithmetic instructions, or will be an ABS
8075 (scalar), which we don't model. */
8076 *cost
= COSTS_N_INSNS (2);
8078 *cost
+= 2 * extra_cost
->alu
.arith
;
8086 if (VECTOR_MODE_P (mode
))
8087 *cost
+= extra_cost
->vect
.alu
;
8090 /* FMAXNM/FMINNM/FMAX/FMIN.
8091 TODO: This may not be accurate for all implementations, but
8092 we do not model this in the cost tables. */
8093 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
8099 /* The floating point round to integer frint* instructions. */
8100 if (aarch64_frint_unspec_p (XINT (x
, 1)))
8103 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
8108 if (XINT (x
, 1) == UNSPEC_RBIT
)
8111 *cost
+= extra_cost
->alu
.rev
;
8119 /* Decompose <su>muldi3_highpart. */
8120 if (/* (truncate:DI */
8123 && GET_MODE (XEXP (x
, 0)) == TImode
8124 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
8126 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
8127 /* (ANY_EXTEND:TI (reg:DI))
8128 (ANY_EXTEND:TI (reg:DI))) */
8129 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
8130 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
8131 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
8132 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
8133 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
8134 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
8135 /* (const_int 64) */
8136 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
8137 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
8141 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
8142 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
8143 mode
, MULT
, 0, speed
);
8144 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
8145 mode
, MULT
, 1, speed
);
8155 && flag_aarch64_verbose_cost
)
8157 "\nFailed to cost RTX. Assuming default cost.\n");
8162 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
8163 calculated for X. This cost is stored in *COST. Returns true
8164 if the total cost of X was calculated. */
8166 aarch64_rtx_costs_wrapper (rtx x
, machine_mode mode
, int outer
,
8167 int param
, int *cost
, bool speed
)
8169 bool result
= aarch64_rtx_costs (x
, mode
, outer
, param
, cost
, speed
);
8172 && flag_aarch64_verbose_cost
)
8174 print_rtl_single (dump_file
, x
);
8175 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
8176 speed
? "Hot" : "Cold",
8177 *cost
, result
? "final" : "partial");
8184 aarch64_register_move_cost (machine_mode mode
,
8185 reg_class_t from_i
, reg_class_t to_i
)
8187 enum reg_class from
= (enum reg_class
) from_i
;
8188 enum reg_class to
= (enum reg_class
) to_i
;
8189 const struct cpu_regmove_cost
*regmove_cost
8190 = aarch64_tune_params
.regmove_cost
;
8192 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
8193 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
8196 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
8197 from
= GENERAL_REGS
;
8199 /* Moving between GPR and stack cost is the same as GP2GP. */
8200 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
8201 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
8202 return regmove_cost
->GP2GP
;
8204 /* To/From the stack register, we move via the gprs. */
8205 if (to
== STACK_REG
|| from
== STACK_REG
)
8206 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
8207 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
8209 if (GET_MODE_SIZE (mode
) == 16)
8211 /* 128-bit operations on general registers require 2 instructions. */
8212 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
8213 return regmove_cost
->GP2GP
* 2;
8214 else if (from
== GENERAL_REGS
)
8215 return regmove_cost
->GP2FP
* 2;
8216 else if (to
== GENERAL_REGS
)
8217 return regmove_cost
->FP2GP
* 2;
8219 /* When AdvSIMD instructions are disabled it is not possible to move
8220 a 128-bit value directly between Q registers. This is handled in
8221 secondary reload. A general register is used as a scratch to move
8222 the upper DI value and the lower DI value is moved directly,
8223 hence the cost is the sum of three moves. */
8225 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
8227 return regmove_cost
->FP2FP
;
8230 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
8231 return regmove_cost
->GP2GP
;
8232 else if (from
== GENERAL_REGS
)
8233 return regmove_cost
->GP2FP
;
8234 else if (to
== GENERAL_REGS
)
8235 return regmove_cost
->FP2GP
;
8237 return regmove_cost
->FP2FP
;
8241 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
8242 reg_class_t rclass ATTRIBUTE_UNUSED
,
8243 bool in ATTRIBUTE_UNUSED
)
8245 return aarch64_tune_params
.memmov_cost
;
8248 /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
8249 to optimize 1.0/sqrt. */
8252 use_rsqrt_p (machine_mode mode
)
8254 return (!flag_trapping_math
8255 && flag_unsafe_math_optimizations
8256 && ((aarch64_tune_params
.approx_modes
->recip_sqrt
8257 & AARCH64_APPROX_MODE (mode
))
8258 || flag_mrecip_low_precision_sqrt
));
8261 /* Function to decide when to use the approximate reciprocal square root
8265 aarch64_builtin_reciprocal (tree fndecl
)
8267 machine_mode mode
= TYPE_MODE (TREE_TYPE (fndecl
));
8269 if (!use_rsqrt_p (mode
))
8271 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl
));
8274 typedef rtx (*rsqrte_type
) (rtx
, rtx
);
8276 /* Select reciprocal square root initial estimate insn depending on machine
8280 get_rsqrte_type (machine_mode mode
)
8284 case E_DFmode
: return gen_aarch64_rsqrtedf
;
8285 case E_SFmode
: return gen_aarch64_rsqrtesf
;
8286 case E_V2DFmode
: return gen_aarch64_rsqrtev2df
;
8287 case E_V2SFmode
: return gen_aarch64_rsqrtev2sf
;
8288 case E_V4SFmode
: return gen_aarch64_rsqrtev4sf
;
8289 default: gcc_unreachable ();
8293 typedef rtx (*rsqrts_type
) (rtx
, rtx
, rtx
);
8295 /* Select reciprocal square root series step insn depending on machine mode. */
8298 get_rsqrts_type (machine_mode mode
)
8302 case E_DFmode
: return gen_aarch64_rsqrtsdf
;
8303 case E_SFmode
: return gen_aarch64_rsqrtssf
;
8304 case E_V2DFmode
: return gen_aarch64_rsqrtsv2df
;
8305 case E_V2SFmode
: return gen_aarch64_rsqrtsv2sf
;
8306 case E_V4SFmode
: return gen_aarch64_rsqrtsv4sf
;
8307 default: gcc_unreachable ();
8311 /* Emit instruction sequence to compute either the approximate square root
8312 or its approximate reciprocal, depending on the flag RECP, and return
8313 whether the sequence was emitted or not. */
8316 aarch64_emit_approx_sqrt (rtx dst
, rtx src
, bool recp
)
8318 machine_mode mode
= GET_MODE (dst
);
8320 if (GET_MODE_INNER (mode
) == HFmode
)
8328 if (!(flag_mlow_precision_sqrt
8329 || (aarch64_tune_params
.approx_modes
->sqrt
8330 & AARCH64_APPROX_MODE (mode
))))
8333 if (flag_finite_math_only
8334 || flag_trapping_math
8335 || !flag_unsafe_math_optimizations
8336 || optimize_function_for_size_p (cfun
))
8340 /* Caller assumes we cannot fail. */
8341 gcc_assert (use_rsqrt_p (mode
));
8343 machine_mode mmsk
= mode_for_int_vector (mode
).require ();
8344 rtx xmsk
= gen_reg_rtx (mmsk
);
8346 /* When calculating the approximate square root, compare the
8347 argument with 0.0 and create a mask. */
8348 emit_insn (gen_rtx_SET (xmsk
,
8350 gen_rtx_EQ (mmsk
, src
,
8351 CONST0_RTX (mode
)))));
8353 /* Estimate the approximate reciprocal square root. */
8354 rtx xdst
= gen_reg_rtx (mode
);
8355 emit_insn ((*get_rsqrte_type (mode
)) (xdst
, src
));
8357 /* Iterate over the series twice for SF and thrice for DF. */
8358 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8360 /* Optionally iterate over the series once less for faster performance
8361 while sacrificing the accuracy. */
8362 if ((recp
&& flag_mrecip_low_precision_sqrt
)
8363 || (!recp
&& flag_mlow_precision_sqrt
))
8366 /* Iterate over the series to calculate the approximate reciprocal square
8368 rtx x1
= gen_reg_rtx (mode
);
8369 while (iterations
--)
8371 rtx x2
= gen_reg_rtx (mode
);
8372 emit_set_insn (x2
, gen_rtx_MULT (mode
, xdst
, xdst
));
8374 emit_insn ((*get_rsqrts_type (mode
)) (x1
, src
, x2
));
8377 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, x1
));
8382 /* Qualify the approximate reciprocal square root when the argument is
8383 0.0 by squashing the intermediary result to 0.0. */
8384 rtx xtmp
= gen_reg_rtx (mmsk
);
8385 emit_set_insn (xtmp
, gen_rtx_AND (mmsk
, gen_rtx_NOT (mmsk
, xmsk
),
8386 gen_rtx_SUBREG (mmsk
, xdst
, 0)));
8387 emit_move_insn (xdst
, gen_rtx_SUBREG (mode
, xtmp
, 0));
8389 /* Calculate the approximate square root. */
8390 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, src
));
8393 /* Finalize the approximation. */
8394 emit_set_insn (dst
, gen_rtx_MULT (mode
, xdst
, x1
));
8399 typedef rtx (*recpe_type
) (rtx
, rtx
);
8401 /* Select reciprocal initial estimate insn depending on machine mode. */
8404 get_recpe_type (machine_mode mode
)
8408 case E_SFmode
: return (gen_aarch64_frecpesf
);
8409 case E_V2SFmode
: return (gen_aarch64_frecpev2sf
);
8410 case E_V4SFmode
: return (gen_aarch64_frecpev4sf
);
8411 case E_DFmode
: return (gen_aarch64_frecpedf
);
8412 case E_V2DFmode
: return (gen_aarch64_frecpev2df
);
8413 default: gcc_unreachable ();
8417 typedef rtx (*recps_type
) (rtx
, rtx
, rtx
);
8419 /* Select reciprocal series step insn depending on machine mode. */
8422 get_recps_type (machine_mode mode
)
8426 case E_SFmode
: return (gen_aarch64_frecpssf
);
8427 case E_V2SFmode
: return (gen_aarch64_frecpsv2sf
);
8428 case E_V4SFmode
: return (gen_aarch64_frecpsv4sf
);
8429 case E_DFmode
: return (gen_aarch64_frecpsdf
);
8430 case E_V2DFmode
: return (gen_aarch64_frecpsv2df
);
8431 default: gcc_unreachable ();
8435 /* Emit the instruction sequence to compute the approximation for the division
8436 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8439 aarch64_emit_approx_div (rtx quo
, rtx num
, rtx den
)
8441 machine_mode mode
= GET_MODE (quo
);
8443 if (GET_MODE_INNER (mode
) == HFmode
)
8446 bool use_approx_division_p
= (flag_mlow_precision_div
8447 || (aarch64_tune_params
.approx_modes
->division
8448 & AARCH64_APPROX_MODE (mode
)));
8450 if (!flag_finite_math_only
8451 || flag_trapping_math
8452 || !flag_unsafe_math_optimizations
8453 || optimize_function_for_size_p (cfun
)
8454 || !use_approx_division_p
)
8457 /* Estimate the approximate reciprocal. */
8458 rtx xrcp
= gen_reg_rtx (mode
);
8459 emit_insn ((*get_recpe_type (mode
)) (xrcp
, den
));
8461 /* Iterate over the series twice for SF and thrice for DF. */
8462 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8464 /* Optionally iterate over the series once less for faster performance,
8465 while sacrificing the accuracy. */
8466 if (flag_mlow_precision_div
)
8469 /* Iterate over the series to calculate the approximate reciprocal. */
8470 rtx xtmp
= gen_reg_rtx (mode
);
8471 while (iterations
--)
8473 emit_insn ((*get_recps_type (mode
)) (xtmp
, xrcp
, den
));
8476 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8479 if (num
!= CONST1_RTX (mode
))
8481 /* As the approximate reciprocal of DEN is already calculated, only
8482 calculate the approximate division when NUM is not 1.0. */
8483 rtx xnum
= force_reg (mode
, num
);
8484 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xnum
));
8487 /* Finalize the approximation. */
8488 emit_set_insn (quo
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8492 /* Return the number of instructions that can be issued per cycle. */
8494 aarch64_sched_issue_rate (void)
8496 return aarch64_tune_params
.issue_rate
;
8500 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8502 int issue_rate
= aarch64_sched_issue_rate ();
8504 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
8508 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8509 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8510 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8513 aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
,
8516 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
8520 /* Vectorizer cost model target hooks. */
8522 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8524 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
8526 int misalign ATTRIBUTE_UNUSED
)
8529 const cpu_vector_cost
*costs
= aarch64_tune_params
.vec_costs
;
8532 if (vectype
!= NULL
)
8533 fp
= FLOAT_TYPE_P (vectype
);
8535 switch (type_of_cost
)
8538 return fp
? costs
->scalar_fp_stmt_cost
: costs
->scalar_int_stmt_cost
;
8541 return costs
->scalar_load_cost
;
8544 return costs
->scalar_store_cost
;
8547 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8550 return costs
->vec_align_load_cost
;
8553 return costs
->vec_store_cost
;
8556 return costs
->vec_to_scalar_cost
;
8559 return costs
->scalar_to_vec_cost
;
8561 case unaligned_load
:
8562 case vector_gather_load
:
8563 return costs
->vec_unalign_load_cost
;
8565 case unaligned_store
:
8566 case vector_scatter_store
:
8567 return costs
->vec_unalign_store_cost
;
8569 case cond_branch_taken
:
8570 return costs
->cond_taken_branch_cost
;
8572 case cond_branch_not_taken
:
8573 return costs
->cond_not_taken_branch_cost
;
8576 return costs
->vec_permute_cost
;
8578 case vec_promote_demote
:
8579 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8582 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
8583 return elements
/ 2 + 1;
8590 /* Implement targetm.vectorize.add_stmt_cost. */
8592 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
8593 struct _stmt_vec_info
*stmt_info
, int misalign
,
8594 enum vect_cost_model_location where
)
8596 unsigned *cost
= (unsigned *) data
;
8597 unsigned retval
= 0;
8599 if (flag_vect_cost_model
)
8601 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
8603 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
8605 /* Statements in an inner loop relative to the loop being
8606 vectorized are weighted more heavily. The value here is
8607 arbitrary and could potentially be improved with analysis. */
8608 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
8609 count
*= 50; /* FIXME */
8611 retval
= (unsigned) (count
* stmt_cost
);
8612 cost
[where
] += retval
;
8618 static void initialize_aarch64_code_model (struct gcc_options
*);
8620 /* Parse the TO_PARSE string and put the architecture struct that it
8621 selects into RES and the architectural features into ISA_FLAGS.
8622 Return an aarch64_parse_opt_result describing the parse result.
8623 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
8625 static enum aarch64_parse_opt_result
8626 aarch64_parse_arch (const char *to_parse
, const struct processor
**res
,
8627 unsigned long *isa_flags
)
8630 const struct processor
*arch
;
8631 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8634 strcpy (str
, to_parse
);
8636 ext
= strchr (str
, '+');
8644 return AARCH64_PARSE_MISSING_ARG
;
8647 /* Loop through the list of supported ARCHes to find a match. */
8648 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
8650 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
8652 unsigned long isa_temp
= arch
->flags
;
8656 /* TO_PARSE string contains at least one extension. */
8657 enum aarch64_parse_opt_result ext_res
8658 = aarch64_parse_extension (ext
, &isa_temp
);
8660 if (ext_res
!= AARCH64_PARSE_OK
)
8663 /* Extension parsing was successful. Confirm the result
8664 arch and ISA flags. */
8666 *isa_flags
= isa_temp
;
8667 return AARCH64_PARSE_OK
;
8671 /* ARCH name not found in list. */
8672 return AARCH64_PARSE_INVALID_ARG
;
8675 /* Parse the TO_PARSE string and put the result tuning in RES and the
8676 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8677 describing the parse result. If there is an error parsing, RES and
8678 ISA_FLAGS are left unchanged. */
8680 static enum aarch64_parse_opt_result
8681 aarch64_parse_cpu (const char *to_parse
, const struct processor
**res
,
8682 unsigned long *isa_flags
)
8685 const struct processor
*cpu
;
8686 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8689 strcpy (str
, to_parse
);
8691 ext
= strchr (str
, '+');
8699 return AARCH64_PARSE_MISSING_ARG
;
8702 /* Loop through the list of supported CPUs to find a match. */
8703 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8705 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
8707 unsigned long isa_temp
= cpu
->flags
;
8712 /* TO_PARSE string contains at least one extension. */
8713 enum aarch64_parse_opt_result ext_res
8714 = aarch64_parse_extension (ext
, &isa_temp
);
8716 if (ext_res
!= AARCH64_PARSE_OK
)
8719 /* Extension parsing was successfull. Confirm the result
8720 cpu and ISA flags. */
8722 *isa_flags
= isa_temp
;
8723 return AARCH64_PARSE_OK
;
8727 /* CPU name not found in list. */
8728 return AARCH64_PARSE_INVALID_ARG
;
8731 /* Parse the TO_PARSE string and put the cpu it selects into RES.
8732 Return an aarch64_parse_opt_result describing the parse result.
8733 If the parsing fails the RES does not change. */
8735 static enum aarch64_parse_opt_result
8736 aarch64_parse_tune (const char *to_parse
, const struct processor
**res
)
8738 const struct processor
*cpu
;
8739 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8741 strcpy (str
, to_parse
);
8743 /* Loop through the list of supported CPUs to find a match. */
8744 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8746 if (strcmp (cpu
->name
, str
) == 0)
8749 return AARCH64_PARSE_OK
;
8753 /* CPU name not found in list. */
8754 return AARCH64_PARSE_INVALID_ARG
;
8757 /* Parse TOKEN, which has length LENGTH to see if it is an option
8758 described in FLAG. If it is, return the index bit for that fusion type.
8759 If not, error (printing OPTION_NAME) and return zero. */
8762 aarch64_parse_one_option_token (const char *token
,
8764 const struct aarch64_flag_desc
*flag
,
8765 const char *option_name
)
8767 for (; flag
->name
!= NULL
; flag
++)
8769 if (length
== strlen (flag
->name
)
8770 && !strncmp (flag
->name
, token
, length
))
8774 error ("unknown flag passed in -moverride=%s (%s)", option_name
, token
);
8778 /* Parse OPTION which is a comma-separated list of flags to enable.
8779 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8780 default state we inherit from the CPU tuning structures. OPTION_NAME
8781 gives the top-level option we are parsing in the -moverride string,
8782 for use in error messages. */
8785 aarch64_parse_boolean_options (const char *option
,
8786 const struct aarch64_flag_desc
*flags
,
8787 unsigned int initial_state
,
8788 const char *option_name
)
8790 const char separator
= '.';
8791 const char* specs
= option
;
8792 const char* ntoken
= option
;
8793 unsigned int found_flags
= initial_state
;
8795 while ((ntoken
= strchr (specs
, separator
)))
8797 size_t token_length
= ntoken
- specs
;
8798 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8802 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8803 in the token stream, reset the supported operations. So:
8805 adrp+add.cmp+branch.none.adrp+add
8807 would have the result of turning on only adrp+add fusion. */
8811 found_flags
|= token_ops
;
8815 /* We ended with a comma, print something. */
8818 error ("%s string ill-formed\n", option_name
);
8822 /* We still have one more token to parse. */
8823 size_t token_length
= strlen (specs
);
8824 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8831 found_flags
|= token_ops
;
8835 /* Support for overriding instruction fusion. */
8838 aarch64_parse_fuse_string (const char *fuse_string
,
8839 struct tune_params
*tune
)
8841 tune
->fusible_ops
= aarch64_parse_boolean_options (fuse_string
,
8842 aarch64_fusible_pairs
,
8847 /* Support for overriding other tuning flags. */
8850 aarch64_parse_tune_string (const char *tune_string
,
8851 struct tune_params
*tune
)
8853 tune
->extra_tuning_flags
8854 = aarch64_parse_boolean_options (tune_string
,
8855 aarch64_tuning_flags
,
8856 tune
->extra_tuning_flags
,
8860 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8861 we understand. If it is, extract the option string and handoff to
8862 the appropriate function. */
8865 aarch64_parse_one_override_token (const char* token
,
8867 struct tune_params
*tune
)
8869 const struct aarch64_tuning_override_function
*fn
8870 = aarch64_tuning_override_functions
;
8872 const char *option_part
= strchr (token
, '=');
8875 error ("tuning string missing in option (%s)", token
);
8879 /* Get the length of the option name. */
8880 length
= option_part
- token
;
8881 /* Skip the '=' to get to the option string. */
8884 for (; fn
->name
!= NULL
; fn
++)
8886 if (!strncmp (fn
->name
, token
, length
))
8888 fn
->parse_override (option_part
, tune
);
8893 error ("unknown tuning option (%s)",token
);
8897 /* A checking mechanism for the implementation of the tls size. */
8900 initialize_aarch64_tls_size (struct gcc_options
*opts
)
8902 if (aarch64_tls_size
== 0)
8903 aarch64_tls_size
= 24;
8905 switch (opts
->x_aarch64_cmodel_var
)
8907 case AARCH64_CMODEL_TINY
:
8908 /* Both the default and maximum TLS size allowed under tiny is 1M which
8909 needs two instructions to address, so we clamp the size to 24. */
8910 if (aarch64_tls_size
> 24)
8911 aarch64_tls_size
= 24;
8913 case AARCH64_CMODEL_SMALL
:
8914 /* The maximum TLS size allowed under small is 4G. */
8915 if (aarch64_tls_size
> 32)
8916 aarch64_tls_size
= 32;
8918 case AARCH64_CMODEL_LARGE
:
8919 /* The maximum TLS size allowed under large is 16E.
8920 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8921 if (aarch64_tls_size
> 48)
8922 aarch64_tls_size
= 48;
8931 /* Parse STRING looking for options in the format:
8932 string :: option:string
8933 option :: name=substring
8935 substring :: defined by option. */
8938 aarch64_parse_override_string (const char* input_string
,
8939 struct tune_params
* tune
)
8941 const char separator
= ':';
8942 size_t string_length
= strlen (input_string
) + 1;
8943 char *string_root
= (char *) xmalloc (sizeof (*string_root
) * string_length
);
8944 char *string
= string_root
;
8945 strncpy (string
, input_string
, string_length
);
8946 string
[string_length
- 1] = '\0';
8948 char* ntoken
= string
;
8950 while ((ntoken
= strchr (string
, separator
)))
8952 size_t token_length
= ntoken
- string
;
8953 /* Make this substring look like a string. */
8955 aarch64_parse_one_override_token (string
, token_length
, tune
);
8959 /* One last option to parse. */
8960 aarch64_parse_one_override_token (string
, strlen (string
), tune
);
8966 aarch64_override_options_after_change_1 (struct gcc_options
*opts
)
8968 /* PR 70044: We have to be careful about being called multiple times for the
8969 same function. This means all changes should be repeatable. */
8971 /* If the frame pointer is enabled, set it to a special value that behaves
8972 similar to frame pointer omission. If we don't do this all leaf functions
8973 will get a frame pointer even if flag_omit_leaf_frame_pointer is set.
8974 If flag_omit_frame_pointer has this special value, we must force the
8975 frame pointer if not in a leaf function. We also need to force it in a
8976 leaf function if flag_omit_frame_pointer is not set or if LR is used. */
8977 if (opts
->x_flag_omit_frame_pointer
== 0)
8978 opts
->x_flag_omit_frame_pointer
= 2;
8980 /* If not optimizing for size, set the default
8981 alignment to what the target wants. */
8982 if (!opts
->x_optimize_size
)
8984 if (opts
->x_align_loops
<= 0)
8985 opts
->x_align_loops
= aarch64_tune_params
.loop_align
;
8986 if (opts
->x_align_jumps
<= 0)
8987 opts
->x_align_jumps
= aarch64_tune_params
.jump_align
;
8988 if (opts
->x_align_functions
<= 0)
8989 opts
->x_align_functions
= aarch64_tune_params
.function_align
;
8992 /* We default to no pc-relative literal loads. */
8994 aarch64_pcrelative_literal_loads
= false;
8996 /* If -mpc-relative-literal-loads is set on the command line, this
8997 implies that the user asked for PC relative literal loads. */
8998 if (opts
->x_pcrelative_literal_loads
== 1)
8999 aarch64_pcrelative_literal_loads
= true;
9001 /* In the tiny memory model it makes no sense to disallow PC relative
9002 literal pool loads. */
9003 if (aarch64_cmodel
== AARCH64_CMODEL_TINY
9004 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
9005 aarch64_pcrelative_literal_loads
= true;
9007 /* When enabling the lower precision Newton series for the square root, also
9008 enable it for the reciprocal square root, since the latter is an
9009 intermediary step for the former. */
9010 if (flag_mlow_precision_sqrt
)
9011 flag_mrecip_low_precision_sqrt
= true;
9014 /* 'Unpack' up the internal tuning structs and update the options
9015 in OPTS. The caller must have set up selected_tune and selected_arch
9016 as all the other target-specific codegen decisions are
9017 derived from them. */
9020 aarch64_override_options_internal (struct gcc_options
*opts
)
9022 aarch64_tune_flags
= selected_tune
->flags
;
9023 aarch64_tune
= selected_tune
->sched_core
;
9024 /* Make a copy of the tuning parameters attached to the core, which
9025 we may later overwrite. */
9026 aarch64_tune_params
= *(selected_tune
->tune
);
9027 aarch64_architecture_version
= selected_arch
->architecture_version
;
9029 if (opts
->x_aarch64_override_tune_string
)
9030 aarch64_parse_override_string (opts
->x_aarch64_override_tune_string
,
9031 &aarch64_tune_params
);
9033 /* This target defaults to strict volatile bitfields. */
9034 if (opts
->x_flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
9035 opts
->x_flag_strict_volatile_bitfields
= 1;
9037 initialize_aarch64_code_model (opts
);
9038 initialize_aarch64_tls_size (opts
);
9040 int queue_depth
= 0;
9041 switch (aarch64_tune_params
.autoprefetcher_model
)
9043 case tune_params::AUTOPREFETCHER_OFF
:
9046 case tune_params::AUTOPREFETCHER_WEAK
:
9049 case tune_params::AUTOPREFETCHER_STRONG
:
9050 queue_depth
= max_insn_queue_index
+ 1;
9056 /* We don't mind passing in global_options_set here as we don't use
9057 the *options_set structs anyway. */
9058 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
9060 opts
->x_param_values
,
9061 global_options_set
.x_param_values
);
9063 /* Set up parameters to be used in prefetching algorithm. Do not
9064 override the defaults unless we are tuning for a core we have
9065 researched values for. */
9066 if (aarch64_tune_params
.prefetch
->num_slots
> 0)
9067 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
9068 aarch64_tune_params
.prefetch
->num_slots
,
9069 opts
->x_param_values
,
9070 global_options_set
.x_param_values
);
9071 if (aarch64_tune_params
.prefetch
->l1_cache_size
>= 0)
9072 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
9073 aarch64_tune_params
.prefetch
->l1_cache_size
,
9074 opts
->x_param_values
,
9075 global_options_set
.x_param_values
);
9076 if (aarch64_tune_params
.prefetch
->l1_cache_line_size
>= 0)
9077 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
9078 aarch64_tune_params
.prefetch
->l1_cache_line_size
,
9079 opts
->x_param_values
,
9080 global_options_set
.x_param_values
);
9081 if (aarch64_tune_params
.prefetch
->l2_cache_size
>= 0)
9082 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
9083 aarch64_tune_params
.prefetch
->l2_cache_size
,
9084 opts
->x_param_values
,
9085 global_options_set
.x_param_values
);
9087 /* Use the alternative scheduling-pressure algorithm by default. */
9088 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
9089 opts
->x_param_values
,
9090 global_options_set
.x_param_values
);
9092 /* Enable sw prefetching at specified optimization level for
9093 CPUS that have prefetch. Lower optimization level threshold by 1
9094 when profiling is enabled. */
9095 if (opts
->x_flag_prefetch_loop_arrays
< 0
9096 && !opts
->x_optimize_size
9097 && aarch64_tune_params
.prefetch
->default_opt_level
>= 0
9098 && opts
->x_optimize
>= aarch64_tune_params
.prefetch
->default_opt_level
)
9099 opts
->x_flag_prefetch_loop_arrays
= 1;
9101 aarch64_override_options_after_change_1 (opts
);
9104 /* Print a hint with a suggestion for a core or architecture name that
9105 most closely resembles what the user passed in STR. ARCH is true if
9106 the user is asking for an architecture name. ARCH is false if the user
9107 is asking for a core name. */
9110 aarch64_print_hint_for_core_or_arch (const char *str
, bool arch
)
9112 auto_vec
<const char *> candidates
;
9113 const struct processor
*entry
= arch
? all_architectures
: all_cores
;
9114 for (; entry
->name
!= NULL
; entry
++)
9115 candidates
.safe_push (entry
->name
);
9117 const char *hint
= candidates_list_and_hint (str
, s
, candidates
);
9119 inform (input_location
, "valid arguments are: %s;"
9120 " did you mean %qs?", s
, hint
);
9124 /* Print a hint with a suggestion for a core name that most closely resembles
9125 what the user passed in STR. */
9128 aarch64_print_hint_for_core (const char *str
)
9130 aarch64_print_hint_for_core_or_arch (str
, false);
9133 /* Print a hint with a suggestion for an architecture name that most closely
9134 resembles what the user passed in STR. */
9137 aarch64_print_hint_for_arch (const char *str
)
9139 aarch64_print_hint_for_core_or_arch (str
, true);
9142 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
9143 specified in STR and throw errors if appropriate. Put the results if
9144 they are valid in RES and ISA_FLAGS. Return whether the option is
9148 aarch64_validate_mcpu (const char *str
, const struct processor
**res
,
9149 unsigned long *isa_flags
)
9151 enum aarch64_parse_opt_result parse_res
9152 = aarch64_parse_cpu (str
, res
, isa_flags
);
9154 if (parse_res
== AARCH64_PARSE_OK
)
9159 case AARCH64_PARSE_MISSING_ARG
:
9160 error ("missing cpu name in %<-mcpu=%s%>", str
);
9162 case AARCH64_PARSE_INVALID_ARG
:
9163 error ("unknown value %qs for -mcpu", str
);
9164 aarch64_print_hint_for_core (str
);
9166 case AARCH64_PARSE_INVALID_FEATURE
:
9167 error ("invalid feature modifier in %<-mcpu=%s%>", str
);
9176 /* Validate a command-line -march option. Parse the arch and extensions
9177 (if any) specified in STR and throw errors if appropriate. Put the
9178 results, if they are valid, in RES and ISA_FLAGS. Return whether the
9182 aarch64_validate_march (const char *str
, const struct processor
**res
,
9183 unsigned long *isa_flags
)
9185 enum aarch64_parse_opt_result parse_res
9186 = aarch64_parse_arch (str
, res
, isa_flags
);
9188 if (parse_res
== AARCH64_PARSE_OK
)
9193 case AARCH64_PARSE_MISSING_ARG
:
9194 error ("missing arch name in %<-march=%s%>", str
);
9196 case AARCH64_PARSE_INVALID_ARG
:
9197 error ("unknown value %qs for -march", str
);
9198 aarch64_print_hint_for_arch (str
);
9200 case AARCH64_PARSE_INVALID_FEATURE
:
9201 error ("invalid feature modifier in %<-march=%s%>", str
);
9210 /* Validate a command-line -mtune option. Parse the cpu
9211 specified in STR and throw errors if appropriate. Put the
9212 result, if it is valid, in RES. Return whether the option is
9216 aarch64_validate_mtune (const char *str
, const struct processor
**res
)
9218 enum aarch64_parse_opt_result parse_res
9219 = aarch64_parse_tune (str
, res
);
9221 if (parse_res
== AARCH64_PARSE_OK
)
9226 case AARCH64_PARSE_MISSING_ARG
:
9227 error ("missing cpu name in %<-mtune=%s%>", str
);
9229 case AARCH64_PARSE_INVALID_ARG
:
9230 error ("unknown value %qs for -mtune", str
);
9231 aarch64_print_hint_for_core (str
);
9239 /* Return the CPU corresponding to the enum CPU.
9240 If it doesn't specify a cpu, return the default. */
9242 static const struct processor
*
9243 aarch64_get_tune_cpu (enum aarch64_processor cpu
)
9245 if (cpu
!= aarch64_none
)
9246 return &all_cores
[cpu
];
9248 /* The & 0x3f is to extract the bottom 6 bits that encode the
9249 default cpu as selected by the --with-cpu GCC configure option
9251 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
9252 flags mechanism should be reworked to make it more sane. */
9253 return &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
9256 /* Return the architecture corresponding to the enum ARCH.
9257 If it doesn't specify a valid architecture, return the default. */
9259 static const struct processor
*
9260 aarch64_get_arch (enum aarch64_arch arch
)
9262 if (arch
!= aarch64_no_arch
)
9263 return &all_architectures
[arch
];
9265 const struct processor
*cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
9267 return &all_architectures
[cpu
->arch
];
9270 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
9271 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
9272 tuning structs. In particular it must set selected_tune and
9273 aarch64_isa_flags that define the available ISA features and tuning
9274 decisions. It must also set selected_arch as this will be used to
9275 output the .arch asm tags for each function. */
9278 aarch64_override_options (void)
9280 unsigned long cpu_isa
= 0;
9281 unsigned long arch_isa
= 0;
9282 aarch64_isa_flags
= 0;
9284 bool valid_cpu
= true;
9285 bool valid_tune
= true;
9286 bool valid_arch
= true;
9288 selected_cpu
= NULL
;
9289 selected_arch
= NULL
;
9290 selected_tune
= NULL
;
9292 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9293 If either of -march or -mtune is given, they override their
9294 respective component of -mcpu. */
9295 if (aarch64_cpu_string
)
9296 valid_cpu
= aarch64_validate_mcpu (aarch64_cpu_string
, &selected_cpu
,
9299 if (aarch64_arch_string
)
9300 valid_arch
= aarch64_validate_march (aarch64_arch_string
, &selected_arch
,
9303 if (aarch64_tune_string
)
9304 valid_tune
= aarch64_validate_mtune (aarch64_tune_string
, &selected_tune
);
9306 /* If the user did not specify a processor, choose the default
9307 one for them. This will be the CPU set during configuration using
9308 --with-cpu, otherwise it is "generic". */
9313 selected_cpu
= &all_cores
[selected_arch
->ident
];
9314 aarch64_isa_flags
= arch_isa
;
9315 explicit_arch
= selected_arch
->arch
;
9319 /* Get default configure-time CPU. */
9320 selected_cpu
= aarch64_get_tune_cpu (aarch64_none
);
9321 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
9325 explicit_tune_core
= selected_tune
->ident
;
9327 /* If both -mcpu and -march are specified check that they are architecturally
9328 compatible, warn if they're not and prefer the -march ISA flags. */
9329 else if (selected_arch
)
9331 if (selected_arch
->arch
!= selected_cpu
->arch
)
9333 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9334 all_architectures
[selected_cpu
->arch
].name
,
9335 selected_arch
->name
);
9337 aarch64_isa_flags
= arch_isa
;
9338 explicit_arch
= selected_arch
->arch
;
9339 explicit_tune_core
= selected_tune
? selected_tune
->ident
9340 : selected_cpu
->ident
;
9344 /* -mcpu but no -march. */
9345 aarch64_isa_flags
= cpu_isa
;
9346 explicit_tune_core
= selected_tune
? selected_tune
->ident
9347 : selected_cpu
->ident
;
9348 gcc_assert (selected_cpu
);
9349 selected_arch
= &all_architectures
[selected_cpu
->arch
];
9350 explicit_arch
= selected_arch
->arch
;
9353 /* Set the arch as well as we will need it when outputing
9354 the .arch directive in assembly. */
9357 gcc_assert (selected_cpu
);
9358 selected_arch
= &all_architectures
[selected_cpu
->arch
];
9362 selected_tune
= selected_cpu
;
9364 #ifndef HAVE_AS_MABI_OPTION
9365 /* The compiler may have been configured with 2.23.* binutils, which does
9366 not have support for ILP32. */
9368 error ("Assembler does not support -mabi=ilp32");
9371 if (aarch64_ra_sign_scope
!= AARCH64_FUNCTION_NONE
&& TARGET_ILP32
)
9372 sorry ("Return address signing is only supported for -mabi=lp64");
9374 /* Make sure we properly set up the explicit options. */
9375 if ((aarch64_cpu_string
&& valid_cpu
)
9376 || (aarch64_tune_string
&& valid_tune
))
9377 gcc_assert (explicit_tune_core
!= aarch64_none
);
9379 if ((aarch64_cpu_string
&& valid_cpu
)
9380 || (aarch64_arch_string
&& valid_arch
))
9381 gcc_assert (explicit_arch
!= aarch64_no_arch
);
9383 aarch64_override_options_internal (&global_options
);
9385 /* Save these options as the default ones in case we push and pop them later
9386 while processing functions with potential target attributes. */
9387 target_option_default_node
= target_option_current_node
9388 = build_target_option_node (&global_options
);
9391 /* Implement targetm.override_options_after_change. */
9394 aarch64_override_options_after_change (void)
9396 aarch64_override_options_after_change_1 (&global_options
);
9399 static struct machine_function
*
9400 aarch64_init_machine_status (void)
9402 struct machine_function
*machine
;
9403 machine
= ggc_cleared_alloc
<machine_function
> ();
9408 aarch64_init_expanders (void)
9410 init_machine_status
= aarch64_init_machine_status
;
9413 /* A checking mechanism for the implementation of the various code models. */
9415 initialize_aarch64_code_model (struct gcc_options
*opts
)
9417 if (opts
->x_flag_pic
)
9419 switch (opts
->x_aarch64_cmodel_var
)
9421 case AARCH64_CMODEL_TINY
:
9422 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
9424 case AARCH64_CMODEL_SMALL
:
9425 #ifdef HAVE_AS_SMALL_PIC_RELOCS
9426 aarch64_cmodel
= (flag_pic
== 2
9427 ? AARCH64_CMODEL_SMALL_PIC
9428 : AARCH64_CMODEL_SMALL_SPIC
);
9430 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
9433 case AARCH64_CMODEL_LARGE
:
9434 sorry ("code model %qs with -f%s", "large",
9435 opts
->x_flag_pic
> 1 ? "PIC" : "pic");
9442 aarch64_cmodel
= opts
->x_aarch64_cmodel_var
;
9445 /* Implement TARGET_OPTION_SAVE. */
9448 aarch64_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
9450 ptr
->x_aarch64_override_tune_string
= opts
->x_aarch64_override_tune_string
;
9453 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9454 using the information saved in PTR. */
9457 aarch64_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
9459 opts
->x_explicit_tune_core
= ptr
->x_explicit_tune_core
;
9460 selected_tune
= aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9461 opts
->x_explicit_arch
= ptr
->x_explicit_arch
;
9462 selected_arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9463 opts
->x_aarch64_override_tune_string
= ptr
->x_aarch64_override_tune_string
;
9465 aarch64_override_options_internal (opts
);
9468 /* Implement TARGET_OPTION_PRINT. */
9471 aarch64_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
9473 const struct processor
*cpu
9474 = aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9475 unsigned long isa_flags
= ptr
->x_aarch64_isa_flags
;
9476 const struct processor
*arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9477 std::string extension
9478 = aarch64_get_extension_string_for_isa_flags (isa_flags
, arch
->flags
);
9480 fprintf (file
, "%*sselected tune = %s\n", indent
, "", cpu
->name
);
9481 fprintf (file
, "%*sselected arch = %s%s\n", indent
, "",
9482 arch
->name
, extension
.c_str ());
9485 static GTY(()) tree aarch64_previous_fndecl
;
9488 aarch64_reset_previous_fndecl (void)
9490 aarch64_previous_fndecl
= NULL
;
9493 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9494 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9495 make sure optab availability predicates are recomputed when necessary. */
9498 aarch64_save_restore_target_globals (tree new_tree
)
9500 if (TREE_TARGET_GLOBALS (new_tree
))
9501 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
9502 else if (new_tree
== target_option_default_node
)
9503 restore_target_globals (&default_target_globals
);
9505 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
9508 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9509 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9510 of the function, if such exists. This function may be called multiple
9511 times on a single function so use aarch64_previous_fndecl to avoid
9512 setting up identical state. */
9515 aarch64_set_current_function (tree fndecl
)
9517 if (!fndecl
|| fndecl
== aarch64_previous_fndecl
)
9520 tree old_tree
= (aarch64_previous_fndecl
9521 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl
)
9524 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9526 /* If current function has no attributes but the previous one did,
9527 use the default node. */
9528 if (!new_tree
&& old_tree
)
9529 new_tree
= target_option_default_node
;
9531 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9532 the default have been handled by aarch64_save_restore_target_globals from
9533 aarch64_pragma_target_parse. */
9534 if (old_tree
== new_tree
)
9537 aarch64_previous_fndecl
= fndecl
;
9539 /* First set the target options. */
9540 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
9542 aarch64_save_restore_target_globals (new_tree
);
9545 /* Enum describing the various ways we can handle attributes.
9546 In many cases we can reuse the generic option handling machinery. */
9548 enum aarch64_attr_opt_type
9550 aarch64_attr_mask
, /* Attribute should set a bit in target_flags. */
9551 aarch64_attr_bool
, /* Attribute sets or unsets a boolean variable. */
9552 aarch64_attr_enum
, /* Attribute sets an enum variable. */
9553 aarch64_attr_custom
/* Attribute requires a custom handling function. */
9556 /* All the information needed to handle a target attribute.
9557 NAME is the name of the attribute.
9558 ATTR_TYPE specifies the type of behavior of the attribute as described
9559 in the definition of enum aarch64_attr_opt_type.
9560 ALLOW_NEG is true if the attribute supports a "no-" form.
9561 HANDLER is the function that takes the attribute string as an argument
9562 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
9563 OPT_NUM is the enum specifying the option that the attribute modifies.
9564 This is needed for attributes that mirror the behavior of a command-line
9565 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9566 aarch64_attr_enum. */
9568 struct aarch64_attribute_info
9571 enum aarch64_attr_opt_type attr_type
;
9573 bool (*handler
) (const char *);
9574 enum opt_code opt_num
;
9577 /* Handle the ARCH_STR argument to the arch= target attribute. */
9580 aarch64_handle_attr_arch (const char *str
)
9582 const struct processor
*tmp_arch
= NULL
;
9583 enum aarch64_parse_opt_result parse_res
9584 = aarch64_parse_arch (str
, &tmp_arch
, &aarch64_isa_flags
);
9586 if (parse_res
== AARCH64_PARSE_OK
)
9588 gcc_assert (tmp_arch
);
9589 selected_arch
= tmp_arch
;
9590 explicit_arch
= selected_arch
->arch
;
9596 case AARCH64_PARSE_MISSING_ARG
:
9597 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
9599 case AARCH64_PARSE_INVALID_ARG
:
9600 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str
);
9601 aarch64_print_hint_for_arch (str
);
9603 case AARCH64_PARSE_INVALID_FEATURE
:
9604 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str
);
9613 /* Handle the argument CPU_STR to the cpu= target attribute. */
9616 aarch64_handle_attr_cpu (const char *str
)
9618 const struct processor
*tmp_cpu
= NULL
;
9619 enum aarch64_parse_opt_result parse_res
9620 = aarch64_parse_cpu (str
, &tmp_cpu
, &aarch64_isa_flags
);
9622 if (parse_res
== AARCH64_PARSE_OK
)
9624 gcc_assert (tmp_cpu
);
9625 selected_tune
= tmp_cpu
;
9626 explicit_tune_core
= selected_tune
->ident
;
9628 selected_arch
= &all_architectures
[tmp_cpu
->arch
];
9629 explicit_arch
= selected_arch
->arch
;
9635 case AARCH64_PARSE_MISSING_ARG
:
9636 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
9638 case AARCH64_PARSE_INVALID_ARG
:
9639 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str
);
9640 aarch64_print_hint_for_core (str
);
9642 case AARCH64_PARSE_INVALID_FEATURE
:
9643 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str
);
9652 /* Handle the argument STR to the tune= target attribute. */
9655 aarch64_handle_attr_tune (const char *str
)
9657 const struct processor
*tmp_tune
= NULL
;
9658 enum aarch64_parse_opt_result parse_res
9659 = aarch64_parse_tune (str
, &tmp_tune
);
9661 if (parse_res
== AARCH64_PARSE_OK
)
9663 gcc_assert (tmp_tune
);
9664 selected_tune
= tmp_tune
;
9665 explicit_tune_core
= selected_tune
->ident
;
9671 case AARCH64_PARSE_INVALID_ARG
:
9672 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str
);
9673 aarch64_print_hint_for_core (str
);
9682 /* Parse an architecture extensions target attribute string specified in STR.
9683 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9684 if successful. Update aarch64_isa_flags to reflect the ISA features
9688 aarch64_handle_attr_isa_flags (char *str
)
9690 enum aarch64_parse_opt_result parse_res
;
9691 unsigned long isa_flags
= aarch64_isa_flags
;
9693 /* We allow "+nothing" in the beginning to clear out all architectural
9694 features if the user wants to handpick specific features. */
9695 if (strncmp ("+nothing", str
, 8) == 0)
9701 parse_res
= aarch64_parse_extension (str
, &isa_flags
);
9703 if (parse_res
== AARCH64_PARSE_OK
)
9705 aarch64_isa_flags
= isa_flags
;
9711 case AARCH64_PARSE_MISSING_ARG
:
9712 error ("missing value in %<target()%> pragma or attribute");
9715 case AARCH64_PARSE_INVALID_FEATURE
:
9716 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str
);
9726 /* The target attributes that we support. On top of these we also support just
9727 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9728 handled explicitly in aarch64_process_one_target_attr. */
9730 static const struct aarch64_attribute_info aarch64_attributes
[] =
9732 { "general-regs-only", aarch64_attr_mask
, false, NULL
,
9733 OPT_mgeneral_regs_only
},
9734 { "fix-cortex-a53-835769", aarch64_attr_bool
, true, NULL
,
9735 OPT_mfix_cortex_a53_835769
},
9736 { "fix-cortex-a53-843419", aarch64_attr_bool
, true, NULL
,
9737 OPT_mfix_cortex_a53_843419
},
9738 { "cmodel", aarch64_attr_enum
, false, NULL
, OPT_mcmodel_
},
9739 { "strict-align", aarch64_attr_mask
, false, NULL
, OPT_mstrict_align
},
9740 { "omit-leaf-frame-pointer", aarch64_attr_bool
, true, NULL
,
9741 OPT_momit_leaf_frame_pointer
},
9742 { "tls-dialect", aarch64_attr_enum
, false, NULL
, OPT_mtls_dialect_
},
9743 { "arch", aarch64_attr_custom
, false, aarch64_handle_attr_arch
,
9745 { "cpu", aarch64_attr_custom
, false, aarch64_handle_attr_cpu
, OPT_mcpu_
},
9746 { "tune", aarch64_attr_custom
, false, aarch64_handle_attr_tune
,
9748 { "sign-return-address", aarch64_attr_enum
, false, NULL
,
9749 OPT_msign_return_address_
},
9750 { NULL
, aarch64_attr_custom
, false, NULL
, OPT____
}
9753 /* Parse ARG_STR which contains the definition of one target attribute.
9754 Show appropriate errors if any or return true if the attribute is valid. */
9757 aarch64_process_one_target_attr (char *arg_str
)
9759 bool invert
= false;
9761 size_t len
= strlen (arg_str
);
9765 error ("malformed %<target()%> pragma or attribute");
9769 char *str_to_check
= (char *) alloca (len
+ 1);
9770 strcpy (str_to_check
, arg_str
);
9772 /* Skip leading whitespace. */
9773 while (*str_to_check
== ' ' || *str_to_check
== '\t')
9776 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9777 It is easier to detect and handle it explicitly here rather than going
9778 through the machinery for the rest of the target attributes in this
9780 if (*str_to_check
== '+')
9781 return aarch64_handle_attr_isa_flags (str_to_check
);
9783 if (len
> 3 && strncmp (str_to_check
, "no-", 3) == 0)
9788 char *arg
= strchr (str_to_check
, '=');
9790 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9791 and point ARG to "foo". */
9797 const struct aarch64_attribute_info
*p_attr
;
9799 for (p_attr
= aarch64_attributes
; p_attr
->name
; p_attr
++)
9801 /* If the names don't match up, or the user has given an argument
9802 to an attribute that doesn't accept one, or didn't give an argument
9803 to an attribute that expects one, fail to match. */
9804 if (strcmp (str_to_check
, p_attr
->name
) != 0)
9808 bool attr_need_arg_p
= p_attr
->attr_type
== aarch64_attr_custom
9809 || p_attr
->attr_type
== aarch64_attr_enum
;
9811 if (attr_need_arg_p
^ (arg
!= NULL
))
9813 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check
);
9817 /* If the name matches but the attribute does not allow "no-" versions
9818 then we can't match. */
9819 if (invert
&& !p_attr
->allow_neg
)
9821 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check
);
9825 switch (p_attr
->attr_type
)
9827 /* Has a custom handler registered.
9828 For example, cpu=, arch=, tune=. */
9829 case aarch64_attr_custom
:
9830 gcc_assert (p_attr
->handler
);
9831 if (!p_attr
->handler (arg
))
9835 /* Either set or unset a boolean option. */
9836 case aarch64_attr_bool
:
9838 struct cl_decoded_option decoded
;
9840 generate_option (p_attr
->opt_num
, NULL
, !invert
,
9841 CL_TARGET
, &decoded
);
9842 aarch64_handle_option (&global_options
, &global_options_set
,
9843 &decoded
, input_location
);
9846 /* Set or unset a bit in the target_flags. aarch64_handle_option
9847 should know what mask to apply given the option number. */
9848 case aarch64_attr_mask
:
9850 struct cl_decoded_option decoded
;
9851 /* We only need to specify the option number.
9852 aarch64_handle_option will know which mask to apply. */
9853 decoded
.opt_index
= p_attr
->opt_num
;
9854 decoded
.value
= !invert
;
9855 aarch64_handle_option (&global_options
, &global_options_set
,
9856 &decoded
, input_location
);
9859 /* Use the option setting machinery to set an option to an enum. */
9860 case aarch64_attr_enum
:
9865 valid
= opt_enum_arg_to_value (p_attr
->opt_num
, arg
,
9869 set_option (&global_options
, NULL
, p_attr
->opt_num
, value
,
9870 NULL
, DK_UNSPECIFIED
, input_location
,
9875 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check
, arg
);
9884 /* If we reached here we either have found an attribute and validated
9885 it or didn't match any. If we matched an attribute but its arguments
9886 were malformed we will have returned false already. */
9890 /* Count how many times the character C appears in
9891 NULL-terminated string STR. */
9894 num_occurences_in_str (char c
, char *str
)
9896 unsigned int res
= 0;
9897 while (*str
!= '\0')
9908 /* Parse the tree in ARGS that contains the target attribute information
9909 and update the global target options space. */
9912 aarch64_process_target_attr (tree args
)
9914 if (TREE_CODE (args
) == TREE_LIST
)
9918 tree head
= TREE_VALUE (args
);
9921 if (!aarch64_process_target_attr (head
))
9924 args
= TREE_CHAIN (args
);
9930 if (TREE_CODE (args
) != STRING_CST
)
9932 error ("attribute %<target%> argument not a string");
9936 size_t len
= strlen (TREE_STRING_POINTER (args
));
9937 char *str_to_check
= (char *) alloca (len
+ 1);
9938 strcpy (str_to_check
, TREE_STRING_POINTER (args
));
9942 error ("malformed %<target()%> pragma or attribute");
9946 /* Used to catch empty spaces between commas i.e.
9947 attribute ((target ("attr1,,attr2"))). */
9948 unsigned int num_commas
= num_occurences_in_str (',', str_to_check
);
9950 /* Handle multiple target attributes separated by ','. */
9951 char *token
= strtok (str_to_check
, ",");
9953 unsigned int num_attrs
= 0;
9957 if (!aarch64_process_one_target_attr (token
))
9959 error ("pragma or attribute %<target(\"%s\")%> is not valid", token
);
9963 token
= strtok (NULL
, ",");
9966 if (num_attrs
!= num_commas
+ 1)
9968 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args
));
9975 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9976 process attribute ((target ("..."))). */
9979 aarch64_option_valid_attribute_p (tree fndecl
, tree
, tree args
, int)
9981 struct cl_target_option cur_target
;
9984 tree new_target
, new_optimize
;
9985 tree existing_target
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9987 /* If what we're processing is the current pragma string then the
9988 target option node is already stored in target_option_current_node
9989 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9990 having to re-parse the string. This is especially useful to keep
9991 arm_neon.h compile times down since that header contains a lot
9992 of intrinsics enclosed in pragmas. */
9993 if (!existing_target
&& args
== current_target_pragma
)
9995 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = target_option_current_node
;
9998 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
10000 old_optimize
= build_optimization_node (&global_options
);
10001 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
10003 /* If the function changed the optimization levels as well as setting
10004 target options, start with the optimizations specified. */
10005 if (func_optimize
&& func_optimize
!= old_optimize
)
10006 cl_optimization_restore (&global_options
,
10007 TREE_OPTIMIZATION (func_optimize
));
10009 /* Save the current target options to restore at the end. */
10010 cl_target_option_save (&cur_target
, &global_options
);
10012 /* If fndecl already has some target attributes applied to it, unpack
10013 them so that we add this attribute on top of them, rather than
10014 overwriting them. */
10015 if (existing_target
)
10017 struct cl_target_option
*existing_options
10018 = TREE_TARGET_OPTION (existing_target
);
10020 if (existing_options
)
10021 cl_target_option_restore (&global_options
, existing_options
);
10024 cl_target_option_restore (&global_options
,
10025 TREE_TARGET_OPTION (target_option_current_node
));
10027 ret
= aarch64_process_target_attr (args
);
10029 /* Set up any additional state. */
10032 aarch64_override_options_internal (&global_options
);
10033 /* Initialize SIMD builtins if we haven't already.
10034 Set current_target_pragma to NULL for the duration so that
10035 the builtin initialization code doesn't try to tag the functions
10036 being built with the attributes specified by any current pragma, thus
10037 going into an infinite recursion. */
10040 tree saved_current_target_pragma
= current_target_pragma
;
10041 current_target_pragma
= NULL
;
10042 aarch64_init_simd_builtins ();
10043 current_target_pragma
= saved_current_target_pragma
;
10045 new_target
= build_target_option_node (&global_options
);
10050 new_optimize
= build_optimization_node (&global_options
);
10054 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
10056 if (old_optimize
!= new_optimize
)
10057 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
10060 cl_target_option_restore (&global_options
, &cur_target
);
10062 if (old_optimize
!= new_optimize
)
10063 cl_optimization_restore (&global_options
,
10064 TREE_OPTIMIZATION (old_optimize
));
10068 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
10069 tri-bool options (yes, no, don't care) and the default value is
10070 DEF, determine whether to reject inlining. */
10073 aarch64_tribools_ok_for_inlining_p (int caller
, int callee
,
10074 int dont_care
, int def
)
10076 /* If the callee doesn't care, always allow inlining. */
10077 if (callee
== dont_care
)
10080 /* If the caller doesn't care, always allow inlining. */
10081 if (caller
== dont_care
)
10084 /* Otherwise, allow inlining if either the callee and caller values
10085 agree, or if the callee is using the default value. */
10086 return (callee
== caller
|| callee
== def
);
10089 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
10090 to inline CALLEE into CALLER based on target-specific info.
10091 Make sure that the caller and callee have compatible architectural
10092 features. Then go through the other possible target attributes
10093 and see if they can block inlining. Try not to reject always_inline
10094 callees unless they are incompatible architecturally. */
10097 aarch64_can_inline_p (tree caller
, tree callee
)
10099 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
10100 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
10102 /* If callee has no option attributes, then it is ok to inline. */
10106 struct cl_target_option
*caller_opts
10107 = TREE_TARGET_OPTION (caller_tree
? caller_tree
10108 : target_option_default_node
);
10110 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
10113 /* Callee's ISA flags should be a subset of the caller's. */
10114 if ((caller_opts
->x_aarch64_isa_flags
& callee_opts
->x_aarch64_isa_flags
)
10115 != callee_opts
->x_aarch64_isa_flags
)
10118 /* Allow non-strict aligned functions inlining into strict
10120 if ((TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)
10121 != TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
))
10122 && !(!TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
)
10123 && TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)))
10126 bool always_inline
= lookup_attribute ("always_inline",
10127 DECL_ATTRIBUTES (callee
));
10129 /* If the architectural features match up and the callee is always_inline
10130 then the other attributes don't matter. */
10134 if (caller_opts
->x_aarch64_cmodel_var
10135 != callee_opts
->x_aarch64_cmodel_var
)
10138 if (caller_opts
->x_aarch64_tls_dialect
10139 != callee_opts
->x_aarch64_tls_dialect
)
10142 /* Honour explicit requests to workaround errata. */
10143 if (!aarch64_tribools_ok_for_inlining_p (
10144 caller_opts
->x_aarch64_fix_a53_err835769
,
10145 callee_opts
->x_aarch64_fix_a53_err835769
,
10146 2, TARGET_FIX_ERR_A53_835769_DEFAULT
))
10149 if (!aarch64_tribools_ok_for_inlining_p (
10150 caller_opts
->x_aarch64_fix_a53_err843419
,
10151 callee_opts
->x_aarch64_fix_a53_err843419
,
10152 2, TARGET_FIX_ERR_A53_843419
))
10155 /* If the user explicitly specified -momit-leaf-frame-pointer for the
10156 caller and calle and they don't match up, reject inlining. */
10157 if (!aarch64_tribools_ok_for_inlining_p (
10158 caller_opts
->x_flag_omit_leaf_frame_pointer
,
10159 callee_opts
->x_flag_omit_leaf_frame_pointer
,
10163 /* If the callee has specific tuning overrides, respect them. */
10164 if (callee_opts
->x_aarch64_override_tune_string
!= NULL
10165 && caller_opts
->x_aarch64_override_tune_string
== NULL
)
10168 /* If the user specified tuning override strings for the
10169 caller and callee and they don't match up, reject inlining.
10170 We just do a string compare here, we don't analyze the meaning
10171 of the string, as it would be too costly for little gain. */
10172 if (callee_opts
->x_aarch64_override_tune_string
10173 && caller_opts
->x_aarch64_override_tune_string
10174 && (strcmp (callee_opts
->x_aarch64_override_tune_string
,
10175 caller_opts
->x_aarch64_override_tune_string
) != 0))
10181 /* Return true if SYMBOL_REF X binds locally. */
10184 aarch64_symbol_binds_local_p (const_rtx x
)
10186 return (SYMBOL_REF_DECL (x
)
10187 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
10188 : SYMBOL_REF_LOCAL_P (x
));
10191 /* Return true if SYMBOL_REF X is thread local */
10193 aarch64_tls_symbol_p (rtx x
)
10195 if (! TARGET_HAVE_TLS
)
10198 if (GET_CODE (x
) != SYMBOL_REF
)
10201 return SYMBOL_REF_TLS_MODEL (x
) != 0;
10204 /* Classify a TLS symbol into one of the TLS kinds. */
10205 enum aarch64_symbol_type
10206 aarch64_classify_tls_symbol (rtx x
)
10208 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
10212 case TLS_MODEL_GLOBAL_DYNAMIC
:
10213 case TLS_MODEL_LOCAL_DYNAMIC
:
10214 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
10216 case TLS_MODEL_INITIAL_EXEC
:
10217 switch (aarch64_cmodel
)
10219 case AARCH64_CMODEL_TINY
:
10220 case AARCH64_CMODEL_TINY_PIC
:
10221 return SYMBOL_TINY_TLSIE
;
10223 return SYMBOL_SMALL_TLSIE
;
10226 case TLS_MODEL_LOCAL_EXEC
:
10227 if (aarch64_tls_size
== 12)
10228 return SYMBOL_TLSLE12
;
10229 else if (aarch64_tls_size
== 24)
10230 return SYMBOL_TLSLE24
;
10231 else if (aarch64_tls_size
== 32)
10232 return SYMBOL_TLSLE32
;
10233 else if (aarch64_tls_size
== 48)
10234 return SYMBOL_TLSLE48
;
10236 gcc_unreachable ();
10238 case TLS_MODEL_EMULATED
:
10239 case TLS_MODEL_NONE
:
10240 return SYMBOL_FORCE_TO_MEM
;
10243 gcc_unreachable ();
10247 /* Return the method that should be used to access SYMBOL_REF or
10250 enum aarch64_symbol_type
10251 aarch64_classify_symbol (rtx x
, rtx offset
)
10253 if (GET_CODE (x
) == LABEL_REF
)
10255 switch (aarch64_cmodel
)
10257 case AARCH64_CMODEL_LARGE
:
10258 return SYMBOL_FORCE_TO_MEM
;
10260 case AARCH64_CMODEL_TINY_PIC
:
10261 case AARCH64_CMODEL_TINY
:
10262 return SYMBOL_TINY_ABSOLUTE
;
10264 case AARCH64_CMODEL_SMALL_SPIC
:
10265 case AARCH64_CMODEL_SMALL_PIC
:
10266 case AARCH64_CMODEL_SMALL
:
10267 return SYMBOL_SMALL_ABSOLUTE
;
10270 gcc_unreachable ();
10274 if (GET_CODE (x
) == SYMBOL_REF
)
10276 if (aarch64_tls_symbol_p (x
))
10277 return aarch64_classify_tls_symbol (x
);
10279 switch (aarch64_cmodel
)
10281 case AARCH64_CMODEL_TINY
:
10282 /* When we retrieve symbol + offset address, we have to make sure
10283 the offset does not cause overflow of the final address. But
10284 we have no way of knowing the address of symbol at compile time
10285 so we can't accurately say if the distance between the PC and
10286 symbol + offset is outside the addressible range of +/-1M in the
10287 TINY code model. So we rely on images not being greater than
10288 1M and cap the offset at 1M and anything beyond 1M will have to
10289 be loaded using an alternative mechanism. Furthermore if the
10290 symbol is a weak reference to something that isn't known to
10291 resolve to a symbol in this module, then force to memory. */
10292 if ((SYMBOL_REF_WEAK (x
)
10293 && !aarch64_symbol_binds_local_p (x
))
10294 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
10295 return SYMBOL_FORCE_TO_MEM
;
10296 return SYMBOL_TINY_ABSOLUTE
;
10298 case AARCH64_CMODEL_SMALL
:
10299 /* Same reasoning as the tiny code model, but the offset cap here is
10301 if ((SYMBOL_REF_WEAK (x
)
10302 && !aarch64_symbol_binds_local_p (x
))
10303 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
10304 HOST_WIDE_INT_C (4294967264)))
10305 return SYMBOL_FORCE_TO_MEM
;
10306 return SYMBOL_SMALL_ABSOLUTE
;
10308 case AARCH64_CMODEL_TINY_PIC
:
10309 if (!aarch64_symbol_binds_local_p (x
))
10310 return SYMBOL_TINY_GOT
;
10311 return SYMBOL_TINY_ABSOLUTE
;
10313 case AARCH64_CMODEL_SMALL_SPIC
:
10314 case AARCH64_CMODEL_SMALL_PIC
:
10315 if (!aarch64_symbol_binds_local_p (x
))
10316 return (aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
10317 ? SYMBOL_SMALL_GOT_28K
: SYMBOL_SMALL_GOT_4G
);
10318 return SYMBOL_SMALL_ABSOLUTE
;
10320 case AARCH64_CMODEL_LARGE
:
10321 /* This is alright even in PIC code as the constant
10322 pool reference is always PC relative and within
10323 the same translation unit. */
10324 if (!aarch64_pcrelative_literal_loads
&& CONSTANT_POOL_ADDRESS_P (x
))
10325 return SYMBOL_SMALL_ABSOLUTE
;
10327 return SYMBOL_FORCE_TO_MEM
;
10330 gcc_unreachable ();
10334 /* By default push everything into the constant pool. */
10335 return SYMBOL_FORCE_TO_MEM
;
10339 aarch64_constant_address_p (rtx x
)
10341 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
10345 aarch64_legitimate_pic_operand_p (rtx x
)
10347 if (GET_CODE (x
) == SYMBOL_REF
10348 || (GET_CODE (x
) == CONST
10349 && GET_CODE (XEXP (x
, 0)) == PLUS
10350 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
10356 /* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
10357 that should be rematerialized rather than spilled. */
10360 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
10362 /* Support CSE and rematerialization of common constants. */
10363 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
10366 /* Do not allow vector struct mode constants. We could support
10367 0 and -1 easily, but they need support in aarch64-simd.md. */
10368 if (aarch64_vect_struct_mode_p (mode
))
10371 /* Do not allow wide int constants - this requires support in movti. */
10372 if (CONST_WIDE_INT_P (x
))
10375 /* Do not allow const (plus (anchor_symbol, const_int)). */
10376 if (GET_CODE (x
) == CONST
)
10380 split_const (x
, &x
, &offset
);
10382 if (SYMBOL_REF_P (x
) && SYMBOL_REF_ANCHOR_P (x
))
10386 if (GET_CODE (x
) == HIGH
)
10389 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10390 so spilling them is better than rematerialization. */
10391 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_TLS_MODEL (x
))
10394 /* Label references are always constant. */
10395 if (GET_CODE (x
) == LABEL_REF
)
10402 aarch64_load_tp (rtx target
)
10405 || GET_MODE (target
) != Pmode
10406 || !register_operand (target
, Pmode
))
10407 target
= gen_reg_rtx (Pmode
);
10409 /* Can return in any reg. */
10410 emit_insn (gen_aarch64_load_tp_hard (target
));
10414 /* On AAPCS systems, this is the "struct __va_list". */
10415 static GTY(()) tree va_list_type
;
10417 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10418 Return the type to use as __builtin_va_list.
10420 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10432 aarch64_build_builtin_va_list (void)
10435 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10437 /* Create the type. */
10438 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
10439 /* Give it the required name. */
10440 va_list_name
= build_decl (BUILTINS_LOCATION
,
10442 get_identifier ("__va_list"),
10444 DECL_ARTIFICIAL (va_list_name
) = 1;
10445 TYPE_NAME (va_list_type
) = va_list_name
;
10446 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
10448 /* Create the fields. */
10449 f_stack
= build_decl (BUILTINS_LOCATION
,
10450 FIELD_DECL
, get_identifier ("__stack"),
10452 f_grtop
= build_decl (BUILTINS_LOCATION
,
10453 FIELD_DECL
, get_identifier ("__gr_top"),
10455 f_vrtop
= build_decl (BUILTINS_LOCATION
,
10456 FIELD_DECL
, get_identifier ("__vr_top"),
10458 f_groff
= build_decl (BUILTINS_LOCATION
,
10459 FIELD_DECL
, get_identifier ("__gr_offs"),
10460 integer_type_node
);
10461 f_vroff
= build_decl (BUILTINS_LOCATION
,
10462 FIELD_DECL
, get_identifier ("__vr_offs"),
10463 integer_type_node
);
10465 /* Tell tree-stdarg pass about our internal offset fields.
10466 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10467 purpose to identify whether the code is updating va_list internal
10468 offset fields through irregular way. */
10469 va_list_gpr_counter_field
= f_groff
;
10470 va_list_fpr_counter_field
= f_vroff
;
10472 DECL_ARTIFICIAL (f_stack
) = 1;
10473 DECL_ARTIFICIAL (f_grtop
) = 1;
10474 DECL_ARTIFICIAL (f_vrtop
) = 1;
10475 DECL_ARTIFICIAL (f_groff
) = 1;
10476 DECL_ARTIFICIAL (f_vroff
) = 1;
10478 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
10479 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
10480 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
10481 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
10482 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
10484 TYPE_FIELDS (va_list_type
) = f_stack
;
10485 DECL_CHAIN (f_stack
) = f_grtop
;
10486 DECL_CHAIN (f_grtop
) = f_vrtop
;
10487 DECL_CHAIN (f_vrtop
) = f_groff
;
10488 DECL_CHAIN (f_groff
) = f_vroff
;
10490 /* Compute its layout. */
10491 layout_type (va_list_type
);
10493 return va_list_type
;
10496 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10498 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
10500 const CUMULATIVE_ARGS
*cum
;
10501 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10502 tree stack
, grtop
, vrtop
, groff
, vroff
;
10504 int gr_save_area_size
= cfun
->va_list_gpr_size
;
10505 int vr_save_area_size
= cfun
->va_list_fpr_size
;
10508 cum
= &crtl
->args
.info
;
10509 if (cfun
->va_list_gpr_size
)
10510 gr_save_area_size
= MIN ((NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
,
10511 cfun
->va_list_gpr_size
);
10512 if (cfun
->va_list_fpr_size
)
10513 vr_save_area_size
= MIN ((NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
)
10514 * UNITS_PER_VREG
, cfun
->va_list_fpr_size
);
10518 gcc_assert (cum
->aapcs_nvrn
== 0);
10519 vr_save_area_size
= 0;
10522 f_stack
= TYPE_FIELDS (va_list_type_node
);
10523 f_grtop
= DECL_CHAIN (f_stack
);
10524 f_vrtop
= DECL_CHAIN (f_grtop
);
10525 f_groff
= DECL_CHAIN (f_vrtop
);
10526 f_vroff
= DECL_CHAIN (f_groff
);
10528 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
10530 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
10532 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
10534 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
10536 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
10539 /* Emit code to initialize STACK, which points to the next varargs stack
10540 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10541 by named arguments. STACK is 8-byte aligned. */
10542 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
10543 if (cum
->aapcs_stack_size
> 0)
10544 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
10545 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
10546 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10548 /* Emit code to initialize GRTOP, the top of the GR save area.
10549 virtual_incoming_args_rtx should have been 16 byte aligned. */
10550 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
10551 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
10552 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10554 /* Emit code to initialize VRTOP, the top of the VR save area.
10555 This address is gr_save_area_bytes below GRTOP, rounded
10556 down to the next 16-byte boundary. */
10557 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
10558 vr_offset
= ROUND_UP (gr_save_area_size
,
10559 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10562 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
10563 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
10564 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10566 /* Emit code to initialize GROFF, the offset from GRTOP of the
10567 next GPR argument. */
10568 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
10569 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
10570 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10572 /* Likewise emit code to initialize VROFF, the offset from FTOP
10573 of the next VR argument. */
10574 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
10575 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
10576 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10579 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10582 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
10583 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
10587 bool is_ha
; /* is HFA or HVA. */
10588 bool dw_align
; /* double-word align. */
10589 machine_mode ag_mode
= VOIDmode
;
10593 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10594 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
10595 HOST_WIDE_INT size
, rsize
, adjust
, align
;
10596 tree t
, u
, cond1
, cond2
;
10598 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
10600 type
= build_pointer_type (type
);
10602 mode
= TYPE_MODE (type
);
10604 f_stack
= TYPE_FIELDS (va_list_type_node
);
10605 f_grtop
= DECL_CHAIN (f_stack
);
10606 f_vrtop
= DECL_CHAIN (f_grtop
);
10607 f_groff
= DECL_CHAIN (f_vrtop
);
10608 f_vroff
= DECL_CHAIN (f_groff
);
10610 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
10611 f_stack
, NULL_TREE
);
10612 size
= int_size_in_bytes (type
);
10613 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
10617 if (aarch64_vfp_is_call_or_return_candidate (mode
,
10623 /* TYPE passed in fp/simd registers. */
10625 aarch64_err_no_fpadvsimd (mode
, "varargs");
10627 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
10628 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
10629 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
10630 unshare_expr (valist
), f_vroff
, NULL_TREE
);
10632 rsize
= nregs
* UNITS_PER_VREG
;
10636 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
10637 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
10639 else if (BLOCK_REG_PADDING (mode
, type
, 1) == PAD_DOWNWARD
10640 && size
< UNITS_PER_VREG
)
10642 adjust
= UNITS_PER_VREG
- size
;
10647 /* TYPE passed in general registers. */
10648 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
10649 unshare_expr (valist
), f_grtop
, NULL_TREE
);
10650 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
10651 unshare_expr (valist
), f_groff
, NULL_TREE
);
10652 rsize
= ROUND_UP (size
, UNITS_PER_WORD
);
10653 nregs
= rsize
/ UNITS_PER_WORD
;
10658 if (BLOCK_REG_PADDING (mode
, type
, 1) == PAD_DOWNWARD
10659 && size
< UNITS_PER_WORD
)
10661 adjust
= UNITS_PER_WORD
- size
;
10665 /* Get a local temporary for the field value. */
10666 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
10668 /* Emit code to branch if off >= 0. */
10669 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
10670 build_int_cst (TREE_TYPE (off
), 0));
10671 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
10675 /* Emit: offs = (offs + 15) & -16. */
10676 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10677 build_int_cst (TREE_TYPE (off
), 15));
10678 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
10679 build_int_cst (TREE_TYPE (off
), -16));
10680 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
10685 /* Update ap.__[g|v]r_offs */
10686 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10687 build_int_cst (TREE_TYPE (off
), rsize
));
10688 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
10692 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10694 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10695 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
10696 build_int_cst (TREE_TYPE (f_off
), 0));
10697 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
10699 /* String up: make sure the assignment happens before the use. */
10700 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
10701 COND_EXPR_ELSE (cond1
) = t
;
10703 /* Prepare the trees handling the argument that is passed on the stack;
10704 the top level node will store in ON_STACK. */
10705 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
10708 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10709 t
= fold_convert (intDI_type_node
, arg
);
10710 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10711 build_int_cst (TREE_TYPE (t
), 15));
10712 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10713 build_int_cst (TREE_TYPE (t
), -16));
10714 t
= fold_convert (TREE_TYPE (arg
), t
);
10715 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
10719 /* Advance ap.__stack */
10720 t
= fold_convert (intDI_type_node
, arg
);
10721 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10722 build_int_cst (TREE_TYPE (t
), size
+ 7));
10723 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10724 build_int_cst (TREE_TYPE (t
), -8));
10725 t
= fold_convert (TREE_TYPE (arg
), t
);
10726 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
10727 /* String up roundup and advance. */
10729 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10730 /* String up with arg */
10731 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
10732 /* Big-endianness related address adjustment. */
10733 if (BLOCK_REG_PADDING (mode
, type
, 1) == PAD_DOWNWARD
10734 && size
< UNITS_PER_WORD
)
10736 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
10737 size_int (UNITS_PER_WORD
- size
));
10738 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
10741 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
10742 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
10744 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10747 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
10748 build_int_cst (TREE_TYPE (off
), adjust
));
10750 t
= fold_convert (sizetype
, t
);
10751 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
10755 /* type ha; // treat as "struct {ftype field[n];}"
10756 ... [computing offs]
10757 for (i = 0; i <nregs; ++i, offs += 16)
10758 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10761 tree tmp_ha
, field_t
, field_ptr_t
;
10763 /* Declare a local variable. */
10764 tmp_ha
= create_tmp_var_raw (type
, "ha");
10765 gimple_add_tmp_var (tmp_ha
);
10767 /* Establish the base type. */
10771 field_t
= float_type_node
;
10772 field_ptr_t
= float_ptr_type_node
;
10775 field_t
= double_type_node
;
10776 field_ptr_t
= double_ptr_type_node
;
10779 field_t
= long_double_type_node
;
10780 field_ptr_t
= long_double_ptr_type_node
;
10783 field_t
= aarch64_fp16_type_node
;
10784 field_ptr_t
= aarch64_fp16_ptr_type_node
;
10789 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
10790 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
10791 field_ptr_t
= build_pointer_type (field_t
);
10798 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10799 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
10801 t
= fold_convert (field_ptr_t
, addr
);
10802 t
= build2 (MODIFY_EXPR
, field_t
,
10803 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
10804 build1 (INDIRECT_REF
, field_t
, t
));
10806 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10807 for (i
= 1; i
< nregs
; ++i
)
10809 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
10810 u
= fold_convert (field_ptr_t
, addr
);
10811 u
= build2 (MODIFY_EXPR
, field_t
,
10812 build2 (MEM_REF
, field_t
, tmp_ha
,
10813 build_int_cst (field_ptr_t
,
10815 int_size_in_bytes (field_t
)))),
10816 build1 (INDIRECT_REF
, field_t
, u
));
10817 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
10820 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
10821 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
10824 COND_EXPR_ELSE (cond2
) = t
;
10825 addr
= fold_convert (build_pointer_type (type
), cond1
);
10826 addr
= build_va_arg_indirect_ref (addr
);
10829 addr
= build_va_arg_indirect_ref (addr
);
10834 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
10837 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
10838 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
10841 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10842 CUMULATIVE_ARGS local_cum
;
10843 int gr_saved
= cfun
->va_list_gpr_size
;
10844 int vr_saved
= cfun
->va_list_fpr_size
;
10846 /* The caller has advanced CUM up to, but not beyond, the last named
10847 argument. Advance a local copy of CUM past the last "real" named
10848 argument, to find out how many registers are left over. */
10850 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
10852 /* Found out how many registers we need to save.
10853 Honor tree-stdvar analysis results. */
10854 if (cfun
->va_list_gpr_size
)
10855 gr_saved
= MIN (NUM_ARG_REGS
- local_cum
.aapcs_ncrn
,
10856 cfun
->va_list_gpr_size
/ UNITS_PER_WORD
);
10857 if (cfun
->va_list_fpr_size
)
10858 vr_saved
= MIN (NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
,
10859 cfun
->va_list_fpr_size
/ UNITS_PER_VREG
);
10863 gcc_assert (local_cum
.aapcs_nvrn
== 0);
10873 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10874 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
10875 - gr_saved
* UNITS_PER_WORD
);
10876 mem
= gen_frame_mem (BLKmode
, ptr
);
10877 set_mem_alias_set (mem
, get_varargs_alias_set ());
10879 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
10884 /* We can't use move_block_from_reg, because it will use
10885 the wrong mode, storing D regs only. */
10886 machine_mode mode
= TImode
;
10887 int off
, i
, vr_start
;
10889 /* Set OFF to the offset from virtual_incoming_args_rtx of
10890 the first vector register. The VR save area lies below
10891 the GR one, and is aligned to 16 bytes. */
10892 off
= -ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10893 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10894 off
-= vr_saved
* UNITS_PER_VREG
;
10896 vr_start
= V0_REGNUM
+ local_cum
.aapcs_nvrn
;
10897 for (i
= 0; i
< vr_saved
; ++i
)
10901 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
10902 mem
= gen_frame_mem (mode
, ptr
);
10903 set_mem_alias_set (mem
, get_varargs_alias_set ());
10904 aarch64_emit_move (mem
, gen_rtx_REG (mode
, vr_start
+ i
));
10905 off
+= UNITS_PER_VREG
;
10910 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10911 any complication of having crtl->args.pretend_args_size changed. */
10912 cfun
->machine
->frame
.saved_varargs_size
10913 = (ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10914 STACK_BOUNDARY
/ BITS_PER_UNIT
)
10915 + vr_saved
* UNITS_PER_VREG
);
10919 aarch64_conditional_register_usage (void)
10924 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
10927 call_used_regs
[i
] = 1;
10932 /* Walk down the type tree of TYPE counting consecutive base elements.
10933 If *MODEP is VOIDmode, then set it to the first valid floating point
10934 type. If a non-floating point type is found, or if a floating point
10935 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10936 otherwise return the count in the sub-tree. */
10938 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
10941 HOST_WIDE_INT size
;
10943 switch (TREE_CODE (type
))
10946 mode
= TYPE_MODE (type
);
10947 if (mode
!= DFmode
&& mode
!= SFmode
10948 && mode
!= TFmode
&& mode
!= HFmode
)
10951 if (*modep
== VOIDmode
)
10954 if (*modep
== mode
)
10960 mode
= TYPE_MODE (TREE_TYPE (type
));
10961 if (mode
!= DFmode
&& mode
!= SFmode
10962 && mode
!= TFmode
&& mode
!= HFmode
)
10965 if (*modep
== VOIDmode
)
10968 if (*modep
== mode
)
10974 /* Use V2SImode and V4SImode as representatives of all 64-bit
10975 and 128-bit vector types. */
10976 size
= int_size_in_bytes (type
);
10989 if (*modep
== VOIDmode
)
10992 /* Vector modes are considered to be opaque: two vectors are
10993 equivalent for the purposes of being homogeneous aggregates
10994 if they are the same size. */
10995 if (*modep
== mode
)
11003 tree index
= TYPE_DOMAIN (type
);
11005 /* Can't handle incomplete types nor sizes that are not
11007 if (!COMPLETE_TYPE_P (type
)
11008 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11011 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
11014 || !TYPE_MAX_VALUE (index
)
11015 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
11016 || !TYPE_MIN_VALUE (index
)
11017 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
11021 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
11022 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
11024 /* There must be no padding. */
11025 if (wi::to_wide (TYPE_SIZE (type
))
11026 != count
* GET_MODE_BITSIZE (*modep
))
11038 /* Can't handle incomplete types nor sizes that are not
11040 if (!COMPLETE_TYPE_P (type
)
11041 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11044 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11046 if (TREE_CODE (field
) != FIELD_DECL
)
11049 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
11052 count
+= sub_count
;
11055 /* There must be no padding. */
11056 if (wi::to_wide (TYPE_SIZE (type
))
11057 != count
* GET_MODE_BITSIZE (*modep
))
11064 case QUAL_UNION_TYPE
:
11066 /* These aren't very interesting except in a degenerate case. */
11071 /* Can't handle incomplete types nor sizes that are not
11073 if (!COMPLETE_TYPE_P (type
)
11074 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
11077 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
11079 if (TREE_CODE (field
) != FIELD_DECL
)
11082 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
11085 count
= count
> sub_count
? count
: sub_count
;
11088 /* There must be no padding. */
11089 if (wi::to_wide (TYPE_SIZE (type
))
11090 != count
* GET_MODE_BITSIZE (*modep
))
11103 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
11104 type as described in AAPCS64 \S 4.1.2.
11106 See the comment above aarch64_composite_type_p for the notes on MODE. */
11109 aarch64_short_vector_p (const_tree type
,
11112 HOST_WIDE_INT size
= -1;
11114 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
11115 size
= int_size_in_bytes (type
);
11116 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
11117 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11118 size
= GET_MODE_SIZE (mode
);
11120 return (size
== 8 || size
== 16);
11123 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
11124 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
11125 array types. The C99 floating-point complex types are also considered
11126 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
11127 types, which are GCC extensions and out of the scope of AAPCS64, are
11128 treated as composite types here as well.
11130 Note that MODE itself is not sufficient in determining whether a type
11131 is such a composite type or not. This is because
11132 stor-layout.c:compute_record_mode may have already changed the MODE
11133 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
11134 structure with only one field may have its MODE set to the mode of the
11135 field. Also an integer mode whose size matches the size of the
11136 RECORD_TYPE type may be used to substitute the original mode
11137 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
11138 solely relied on. */
11141 aarch64_composite_type_p (const_tree type
,
11144 if (aarch64_short_vector_p (type
, mode
))
11147 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
11150 if (mode
== BLKmode
11151 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
11152 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
11158 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
11159 shall be passed or returned in simd/fp register(s) (providing these
11160 parameter passing registers are available).
11162 Upon successful return, *COUNT returns the number of needed registers,
11163 *BASE_MODE returns the mode of the individual register and when IS_HAF
11164 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
11165 floating-point aggregate or a homogeneous short-vector aggregate. */
11168 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
11170 machine_mode
*base_mode
,
11174 machine_mode new_mode
= VOIDmode
;
11175 bool composite_p
= aarch64_composite_type_p (type
, mode
);
11177 if (is_ha
!= NULL
) *is_ha
= false;
11179 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11180 || aarch64_short_vector_p (type
, mode
))
11185 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
11187 if (is_ha
!= NULL
) *is_ha
= true;
11189 new_mode
= GET_MODE_INNER (mode
);
11191 else if (type
&& composite_p
)
11193 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
11195 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
11197 if (is_ha
!= NULL
) *is_ha
= true;
11206 *base_mode
= new_mode
;
11210 /* Implement TARGET_STRUCT_VALUE_RTX. */
11213 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
11214 int incoming ATTRIBUTE_UNUSED
)
11216 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
11219 /* Implements target hook vector_mode_supported_p. */
11221 aarch64_vector_mode_supported_p (machine_mode mode
)
11224 && (mode
== V4SImode
|| mode
== V8HImode
11225 || mode
== V16QImode
|| mode
== V2DImode
11226 || mode
== V2SImode
|| mode
== V4HImode
11227 || mode
== V8QImode
|| mode
== V2SFmode
11228 || mode
== V4SFmode
|| mode
== V2DFmode
11229 || mode
== V4HFmode
|| mode
== V8HFmode
11230 || mode
== V1DFmode
))
11236 /* Return appropriate SIMD container
11237 for MODE within a vector of WIDTH bits. */
11238 static machine_mode
11239 aarch64_simd_container_mode (scalar_mode mode
, unsigned width
)
11241 gcc_assert (width
== 64 || width
== 128);
11284 /* Return 128-bit container as the preferred SIMD mode for MODE. */
11285 static machine_mode
11286 aarch64_preferred_simd_mode (scalar_mode mode
)
11288 return aarch64_simd_container_mode (mode
, 128);
11291 /* Return the bitmask of possible vector sizes for the vectorizer
11292 to iterate over. */
11293 static unsigned int
11294 aarch64_autovectorize_vector_sizes (void)
11299 /* Implement TARGET_MANGLE_TYPE. */
11301 static const char *
11302 aarch64_mangle_type (const_tree type
)
11304 /* The AArch64 ABI documents say that "__va_list" has to be
11305 managled as if it is in the "std" namespace. */
11306 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
11307 return "St9__va_list";
11309 /* Half-precision float. */
11310 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
11313 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11315 if (TYPE_NAME (type
) != NULL
)
11316 return aarch64_mangle_builtin_type (type
);
11318 /* Use the default mangling. */
11322 /* Find the first rtx_insn before insn that will generate an assembly
11326 aarch64_prev_real_insn (rtx_insn
*insn
)
11333 insn
= prev_real_insn (insn
);
11335 while (insn
&& recog_memoized (insn
) < 0);
11341 is_madd_op (enum attr_type t1
)
11344 /* A number of these may be AArch32 only. */
11345 enum attr_type mlatypes
[] = {
11346 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
11347 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
11348 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
11351 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
11353 if (t1
== mlatypes
[i
])
11360 /* Check if there is a register dependency between a load and the insn
11361 for which we hold recog_data. */
11364 dep_between_memop_and_curr (rtx memop
)
11369 gcc_assert (GET_CODE (memop
) == SET
);
11371 if (!REG_P (SET_DEST (memop
)))
11374 load_reg
= SET_DEST (memop
);
11375 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
11377 rtx operand
= recog_data
.operand
[opno
];
11378 if (REG_P (operand
)
11379 && reg_overlap_mentioned_p (load_reg
, operand
))
11387 /* When working around the Cortex-A53 erratum 835769,
11388 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11389 instruction and has a preceding memory instruction such that a NOP
11390 should be inserted between them. */
11393 aarch64_madd_needs_nop (rtx_insn
* insn
)
11395 enum attr_type attr_type
;
11399 if (!TARGET_FIX_ERR_A53_835769
)
11402 if (!INSN_P (insn
) || recog_memoized (insn
) < 0)
11405 attr_type
= get_attr_type (insn
);
11406 if (!is_madd_op (attr_type
))
11409 prev
= aarch64_prev_real_insn (insn
);
11410 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11411 Restore recog state to INSN to avoid state corruption. */
11412 extract_constrain_insn_cached (insn
);
11414 if (!prev
|| !contains_mem_rtx_p (PATTERN (prev
)))
11417 body
= single_set (prev
);
11419 /* If the previous insn is a memory op and there is no dependency between
11420 it and the DImode madd, emit a NOP between them. If body is NULL then we
11421 have a complex memory operation, probably a load/store pair.
11422 Be conservative for now and emit a NOP. */
11423 if (GET_MODE (recog_data
.operand
[0]) == DImode
11424 && (!body
|| !dep_between_memop_and_curr (body
)))
11432 /* Implement FINAL_PRESCAN_INSN. */
11435 aarch64_final_prescan_insn (rtx_insn
*insn
)
11437 if (aarch64_madd_needs_nop (insn
))
11438 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
11442 /* Return the equivalent letter for size. */
11444 sizetochar (int size
)
11448 case 64: return 'd';
11449 case 32: return 's';
11450 case 16: return 'h';
11451 case 8 : return 'b';
11452 default: gcc_unreachable ();
11456 /* Return true iff x is a uniform vector of floating-point
11457 constants, and the constant can be represented in
11458 quarter-precision form. Note, as aarch64_float_const_representable
11459 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11461 aarch64_vect_float_const_representable_p (rtx x
)
11464 return (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_FLOAT
11465 && const_vec_duplicate_p (x
, &elt
)
11466 && aarch64_float_const_representable_p (elt
));
11469 /* Return true for valid and false for invalid. */
11471 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
11472 struct simd_immediate_info
*info
,
11473 enum simd_immediate_check which
)
11475 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11477 for (i = 0; i < idx; i += (STRIDE)) \
11482 immtype = (CLASS); \
11483 elsize = (ELSIZE); \
11484 eshift = (SHIFT); \
11489 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
11490 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11491 unsigned char bytes
[16];
11492 int immtype
= -1, matches
;
11493 unsigned int invmask
= inverse
? 0xff : 0;
11496 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11498 if (! (aarch64_simd_imm_zero_p (op
, mode
)
11499 || aarch64_vect_float_const_representable_p (op
)))
11504 rtx elt
= CONST_VECTOR_ELT (op
, 0);
11505 scalar_float_mode elt_mode
11506 = as_a
<scalar_float_mode
> (GET_MODE (elt
));
11509 info
->element_width
= GET_MODE_BITSIZE (elt_mode
);
11517 /* Splat vector constant out into a byte vector. */
11518 for (i
= 0; i
< n_elts
; i
++)
11520 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11521 it must be laid out in the vector register in reverse order. */
11522 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
11523 unsigned HOST_WIDE_INT elpart
;
11525 gcc_assert (CONST_INT_P (el
));
11526 elpart
= INTVAL (el
);
11528 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11530 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11531 elpart
>>= BITS_PER_UNIT
;
11536 /* Sanity check. */
11537 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11541 if (which
& AARCH64_CHECK_ORR
)
11543 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11544 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
11546 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11547 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11549 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11550 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11552 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11553 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
11555 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
11557 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
11560 if (which
& AARCH64_CHECK_BIC
)
11562 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11563 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
11565 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11566 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11568 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11569 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11571 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11572 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
11574 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
11576 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
11579 /* Shifting ones / 8-bit / 64-bit variants only checked
11580 for 'ALL' (MOVI/MVNI). */
11581 if (which
== AARCH64_CHECK_MOV
)
11583 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11584 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11586 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11587 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11589 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11590 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11592 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11593 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11595 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
11597 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11598 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
11608 info
->element_width
= elsize
;
11609 info
->mvn
= emvn
!= 0;
11610 info
->shift
= eshift
;
11612 unsigned HOST_WIDE_INT imm
= 0;
11614 if (immtype
>= 12 && immtype
<= 15)
11617 /* Un-invert bytes of recognized vector, if necessary. */
11619 for (i
= 0; i
< idx
; i
++)
11620 bytes
[i
] ^= invmask
;
11624 /* FIXME: Broken on 32-bit H_W_I hosts. */
11625 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11627 for (i
= 0; i
< 8; i
++)
11628 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11629 << (i
* BITS_PER_UNIT
);
11632 info
->value
= GEN_INT (imm
);
11636 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11637 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11639 /* Construct 'abcdefgh' because the assembler cannot handle
11640 generic constants. */
11643 imm
= (imm
>> info
->shift
) & 0xff;
11644 info
->value
= GEN_INT (imm
);
11652 /* Check of immediate shift constants are within range. */
11654 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
11656 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
11658 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
11660 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
11663 /* Return true if X is a uniform vector where all elements
11664 are either the floating-point constant 0.0 or the
11665 integer constant 0. */
11667 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
11669 return x
== CONST0_RTX (mode
);
11673 /* Return the bitmask CONST_INT to select the bits required by a zero extract
11674 operation of width WIDTH at bit position POS. */
11677 aarch64_mask_from_zextract_ops (rtx width
, rtx pos
)
11679 gcc_assert (CONST_INT_P (width
));
11680 gcc_assert (CONST_INT_P (pos
));
11682 unsigned HOST_WIDE_INT mask
11683 = ((unsigned HOST_WIDE_INT
) 1 << UINTVAL (width
)) - 1;
11684 return GEN_INT (mask
<< UINTVAL (pos
));
11688 aarch64_mov_operand_p (rtx x
, machine_mode mode
)
11690 if (GET_CODE (x
) == HIGH
11691 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
11694 if (CONST_INT_P (x
))
11697 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
11700 return aarch64_classify_symbolic_expression (x
)
11701 == SYMBOL_TINY_ABSOLUTE
;
11704 /* Return a const_int vector of VAL. */
11706 aarch64_simd_gen_const_vector_dup (machine_mode mode
, HOST_WIDE_INT val
)
11708 rtx c
= gen_int_mode (val
, GET_MODE_INNER (mode
));
11709 return gen_const_vec_duplicate (mode
, c
);
11712 /* Check OP is a legal scalar immediate for the MOVI instruction. */
11715 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, scalar_int_mode mode
)
11717 machine_mode vmode
;
11719 vmode
= aarch64_preferred_simd_mode (mode
);
11720 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
11721 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
11724 /* Construct and return a PARALLEL RTX vector with elements numbering the
11725 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11726 the vector - from the perspective of the architecture. This does not
11727 line up with GCC's perspective on lane numbers, so we end up with
11728 different masks depending on our target endian-ness. The diagram
11729 below may help. We must draw the distinction when building masks
11730 which select one half of the vector. An instruction selecting
11731 architectural low-lanes for a big-endian target, must be described using
11732 a mask selecting GCC high-lanes.
11734 Big-Endian Little-Endian
11736 GCC 0 1 2 3 3 2 1 0
11737 | x | x | x | x | | x | x | x | x |
11738 Architecture 3 2 1 0 3 2 1 0
11740 Low Mask: { 2, 3 } { 0, 1 }
11741 High Mask: { 0, 1 } { 2, 3 }
11743 MODE Is the mode of the vector and NUNITS is the number of units in it. */
11746 aarch64_simd_vect_par_cnst_half (machine_mode mode
, int nunits
, bool high
)
11748 rtvec v
= rtvec_alloc (nunits
/ 2);
11749 int high_base
= nunits
/ 2;
11755 if (BYTES_BIG_ENDIAN
)
11756 base
= high
? low_base
: high_base
;
11758 base
= high
? high_base
: low_base
;
11760 for (i
= 0; i
< nunits
/ 2; i
++)
11761 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
11763 t1
= gen_rtx_PARALLEL (mode
, v
);
11767 /* Check OP for validity as a PARALLEL RTX vector with elements
11768 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11769 from the perspective of the architecture. See the diagram above
11770 aarch64_simd_vect_par_cnst_half for more details. */
11773 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
11776 if (!VECTOR_MODE_P (mode
))
11779 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, GET_MODE_NUNITS (mode
),
11781 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
11782 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
11785 if (count_op
!= count_ideal
)
11788 for (i
= 0; i
< count_ideal
; i
++)
11790 rtx elt_op
= XVECEXP (op
, 0, i
);
11791 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
11793 if (!CONST_INT_P (elt_op
)
11794 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
11800 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11801 HIGH (exclusive). */
11803 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11806 HOST_WIDE_INT lane
;
11807 gcc_assert (CONST_INT_P (operand
));
11808 lane
= INTVAL (operand
);
11810 if (lane
< low
|| lane
>= high
)
11813 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
11815 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
11819 /* Peform endian correction on lane number N, which indexes a vector
11820 of mode MODE, and return the result as an SImode rtx. */
11823 aarch64_endian_lane_rtx (machine_mode mode
, unsigned int n
)
11825 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode
), n
), SImode
);
11828 /* Return TRUE if OP is a valid vector addressing mode. */
11830 aarch64_simd_mem_operand_p (rtx op
)
11832 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
11833 || REG_P (XEXP (op
, 0)));
11836 /* Emit a register copy from operand to operand, taking care not to
11837 early-clobber source registers in the process.
11839 COUNT is the number of components into which the copy needs to be
11842 aarch64_simd_emit_reg_reg_move (rtx
*operands
, machine_mode mode
,
11843 unsigned int count
)
11846 int rdest
= REGNO (operands
[0]);
11847 int rsrc
= REGNO (operands
[1]);
11849 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
11851 for (i
= 0; i
< count
; i
++)
11852 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
11853 gen_rtx_REG (mode
, rsrc
+ i
));
11855 for (i
= 0; i
< count
; i
++)
11856 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
11857 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
11860 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
11861 one of VSTRUCT modes: OI, CI, or XI. */
11863 aarch64_simd_attr_length_rglist (machine_mode mode
)
11865 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
11868 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11869 alignment of a vector to 128 bits. */
11870 static HOST_WIDE_INT
11871 aarch64_simd_vector_alignment (const_tree type
)
11873 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
11874 return MIN (align
, 128);
11877 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11879 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
11884 /* We guarantee alignment for vectors up to 128-bits. */
11885 if (tree_int_cst_compare (TYPE_SIZE (type
),
11886 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
11889 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11893 /* Return true if the vector misalignment factor is supported by the
11896 aarch64_builtin_support_vector_misalignment (machine_mode mode
,
11897 const_tree type
, int misalignment
,
11900 if (TARGET_SIMD
&& STRICT_ALIGNMENT
)
11902 /* Return if movmisalign pattern is not supported for this mode. */
11903 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
11906 /* Misalignment factor is unknown at compile time. */
11907 if (misalignment
== -1)
11910 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
11914 /* If VALS is a vector constant that can be loaded into a register
11915 using DUP, generate instructions to do so and return an RTX to
11916 assign to the register. Otherwise return NULL_RTX. */
11918 aarch64_simd_dup_constant (rtx vals
)
11920 machine_mode mode
= GET_MODE (vals
);
11921 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11924 if (!const_vec_duplicate_p (vals
, &x
))
11927 /* We can load this constant by using DUP and a constant in a
11928 single ARM register. This will be cheaper than a vector
11930 x
= copy_to_mode_reg (inner_mode
, x
);
11931 return gen_vec_duplicate (mode
, x
);
11935 /* Generate code to load VALS, which is a PARALLEL containing only
11936 constants (for vec_init) or CONST_VECTOR, efficiently into a
11937 register. Returns an RTX to copy into the register, or NULL_RTX
11938 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11940 aarch64_simd_make_constant (rtx vals
)
11942 machine_mode mode
= GET_MODE (vals
);
11944 rtx const_vec
= NULL_RTX
;
11945 int n_elts
= GET_MODE_NUNITS (mode
);
11949 if (GET_CODE (vals
) == CONST_VECTOR
)
11951 else if (GET_CODE (vals
) == PARALLEL
)
11953 /* A CONST_VECTOR must contain only CONST_INTs and
11954 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11955 Only store valid constants in a CONST_VECTOR. */
11956 for (i
= 0; i
< n_elts
; ++i
)
11958 rtx x
= XVECEXP (vals
, 0, i
);
11959 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11962 if (n_const
== n_elts
)
11963 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11966 gcc_unreachable ();
11968 if (const_vec
!= NULL_RTX
11969 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
11970 /* Load using MOVI/MVNI. */
11972 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
11973 /* Loaded using DUP. */
11975 else if (const_vec
!= NULL_RTX
)
11976 /* Load from constant pool. We can not take advantage of single-cycle
11977 LD1 because we need a PC-relative addressing mode. */
11980 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11981 We can not construct an initializer. */
11985 /* Expand a vector initialisation sequence, such that TARGET is
11986 initialised to contain VALS. */
11989 aarch64_expand_vector_init (rtx target
, rtx vals
)
11991 machine_mode mode
= GET_MODE (target
);
11992 scalar_mode inner_mode
= GET_MODE_INNER (mode
);
11993 /* The number of vector elements. */
11994 int n_elts
= GET_MODE_NUNITS (mode
);
11995 /* The number of vector elements which are not constant. */
11997 rtx any_const
= NULL_RTX
;
11998 /* The first element of vals. */
11999 rtx v0
= XVECEXP (vals
, 0, 0);
12000 bool all_same
= true;
12002 /* Count the number of variable elements to initialise. */
12003 for (int i
= 0; i
< n_elts
; ++i
)
12005 rtx x
= XVECEXP (vals
, 0, i
);
12006 if (!(CONST_INT_P (x
) || CONST_DOUBLE_P (x
)))
12011 all_same
&= rtx_equal_p (x
, v0
);
12014 /* No variable elements, hand off to aarch64_simd_make_constant which knows
12015 how best to handle this. */
12018 rtx constant
= aarch64_simd_make_constant (vals
);
12019 if (constant
!= NULL_RTX
)
12021 emit_move_insn (target
, constant
);
12026 /* Splat a single non-constant element if we can. */
12029 rtx x
= copy_to_mode_reg (inner_mode
, v0
);
12030 aarch64_emit_move (target
, gen_vec_duplicate (mode
, x
));
12034 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
12035 gcc_assert (icode
!= CODE_FOR_nothing
);
12037 /* If there are only variable elements, try to optimize
12038 the insertion using dup for the most common element
12039 followed by insertions. */
12041 /* The algorithm will fill matches[*][0] with the earliest matching element,
12042 and matches[X][1] with the count of duplicate elements (if X is the
12043 earliest element which has duplicates). */
12045 if (n_var
== n_elts
&& n_elts
<= 16)
12047 int matches
[16][2] = {0};
12048 for (int i
= 0; i
< n_elts
; i
++)
12050 for (int j
= 0; j
<= i
; j
++)
12052 if (rtx_equal_p (XVECEXP (vals
, 0, i
), XVECEXP (vals
, 0, j
)))
12060 int maxelement
= 0;
12062 for (int i
= 0; i
< n_elts
; i
++)
12063 if (matches
[i
][1] > maxv
)
12066 maxv
= matches
[i
][1];
12069 /* Create a duplicate of the most common element. */
12070 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, maxelement
));
12071 aarch64_emit_move (target
, gen_vec_duplicate (mode
, x
));
12073 /* Insert the rest. */
12074 for (int i
= 0; i
< n_elts
; i
++)
12076 rtx x
= XVECEXP (vals
, 0, i
);
12077 if (matches
[i
][0] == maxelement
)
12079 x
= copy_to_mode_reg (inner_mode
, x
);
12080 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
12085 /* Initialise a vector which is part-variable. We want to first try
12086 to build those lanes which are constant in the most efficient way we
12088 if (n_var
!= n_elts
)
12090 rtx copy
= copy_rtx (vals
);
12092 /* Load constant part of vector. We really don't care what goes into the
12093 parts we will overwrite, but we're more likely to be able to load the
12094 constant efficiently if it has fewer, larger, repeating parts
12095 (see aarch64_simd_valid_immediate). */
12096 for (int i
= 0; i
< n_elts
; i
++)
12098 rtx x
= XVECEXP (vals
, 0, i
);
12099 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12101 rtx subst
= any_const
;
12102 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
12104 /* Look in the copied vector, as more elements are const. */
12105 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
12106 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
12112 XVECEXP (copy
, 0, i
) = subst
;
12114 aarch64_expand_vector_init (target
, copy
);
12117 /* Insert the variable lanes directly. */
12118 for (int i
= 0; i
< n_elts
; i
++)
12120 rtx x
= XVECEXP (vals
, 0, i
);
12121 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12123 x
= copy_to_mode_reg (inner_mode
, x
);
12124 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
12128 static unsigned HOST_WIDE_INT
12129 aarch64_shift_truncation_mask (machine_mode mode
)
12132 (!SHIFT_COUNT_TRUNCATED
12133 || aarch64_vector_mode_supported_p (mode
)
12134 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
12137 /* Select a format to encode pointers in exception handling data. */
12139 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
12142 switch (aarch64_cmodel
)
12144 case AARCH64_CMODEL_TINY
:
12145 case AARCH64_CMODEL_TINY_PIC
:
12146 case AARCH64_CMODEL_SMALL
:
12147 case AARCH64_CMODEL_SMALL_PIC
:
12148 case AARCH64_CMODEL_SMALL_SPIC
:
12149 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
12151 type
= DW_EH_PE_sdata4
;
12154 /* No assumptions here. 8-byte relocs required. */
12155 type
= DW_EH_PE_sdata8
;
12158 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
12161 /* The last .arch and .tune assembly strings that we printed. */
12162 static std::string aarch64_last_printed_arch_string
;
12163 static std::string aarch64_last_printed_tune_string
;
12165 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
12166 by the function fndecl. */
12169 aarch64_declare_function_name (FILE *stream
, const char* name
,
12172 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
12174 struct cl_target_option
*targ_options
;
12176 targ_options
= TREE_TARGET_OPTION (target_parts
);
12178 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
12179 gcc_assert (targ_options
);
12181 const struct processor
*this_arch
12182 = aarch64_get_arch (targ_options
->x_explicit_arch
);
12184 unsigned long isa_flags
= targ_options
->x_aarch64_isa_flags
;
12185 std::string extension
12186 = aarch64_get_extension_string_for_isa_flags (isa_flags
,
12188 /* Only update the assembler .arch string if it is distinct from the last
12189 such string we printed. */
12190 std::string to_print
= this_arch
->name
+ extension
;
12191 if (to_print
!= aarch64_last_printed_arch_string
)
12193 asm_fprintf (asm_out_file
, "\t.arch %s\n", to_print
.c_str ());
12194 aarch64_last_printed_arch_string
= to_print
;
12197 /* Print the cpu name we're tuning for in the comments, might be
12198 useful to readers of the generated asm. Do it only when it changes
12199 from function to function and verbose assembly is requested. */
12200 const struct processor
*this_tune
12201 = aarch64_get_tune_cpu (targ_options
->x_explicit_tune_core
);
12203 if (flag_debug_asm
&& aarch64_last_printed_tune_string
!= this_tune
->name
)
12205 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune %s\n",
12207 aarch64_last_printed_tune_string
= this_tune
->name
;
12210 /* Don't forget the type directive for ELF. */
12211 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
12212 ASM_OUTPUT_LABEL (stream
, name
);
12215 /* Implements TARGET_ASM_FILE_START. Output the assembly header. */
12218 aarch64_start_file (void)
12220 struct cl_target_option
*default_options
12221 = TREE_TARGET_OPTION (target_option_default_node
);
12223 const struct processor
*default_arch
12224 = aarch64_get_arch (default_options
->x_explicit_arch
);
12225 unsigned long default_isa_flags
= default_options
->x_aarch64_isa_flags
;
12226 std::string extension
12227 = aarch64_get_extension_string_for_isa_flags (default_isa_flags
,
12228 default_arch
->flags
);
12230 aarch64_last_printed_arch_string
= default_arch
->name
+ extension
;
12231 aarch64_last_printed_tune_string
= "";
12232 asm_fprintf (asm_out_file
, "\t.arch %s\n",
12233 aarch64_last_printed_arch_string
.c_str ());
12235 default_file_start ();
12238 /* Emit load exclusive. */
12241 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
12242 rtx mem
, rtx model_rtx
)
12244 rtx (*gen
) (rtx
, rtx
, rtx
);
12248 case E_QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
12249 case E_HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
12250 case E_SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
12251 case E_DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
12253 gcc_unreachable ();
12256 emit_insn (gen (rval
, mem
, model_rtx
));
12259 /* Emit store exclusive. */
12262 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
12263 rtx rval
, rtx mem
, rtx model_rtx
)
12265 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12269 case E_QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
12270 case E_HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
12271 case E_SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
12272 case E_DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
12274 gcc_unreachable ();
12277 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
12280 /* Mark the previous jump instruction as unlikely. */
12283 aarch64_emit_unlikely_jump (rtx insn
)
12285 rtx_insn
*jump
= emit_jump_insn (insn
);
12286 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
12289 /* Expand a compare and swap pattern. */
12292 aarch64_expand_compare_and_swap (rtx operands
[])
12294 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
12295 machine_mode mode
, cmp_mode
;
12296 typedef rtx (*gen_cas_fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
12299 const gen_cas_fn split_cas
[] =
12301 gen_aarch64_compare_and_swapqi
,
12302 gen_aarch64_compare_and_swaphi
,
12303 gen_aarch64_compare_and_swapsi
,
12304 gen_aarch64_compare_and_swapdi
12306 const gen_cas_fn atomic_cas
[] =
12308 gen_aarch64_compare_and_swapqi_lse
,
12309 gen_aarch64_compare_and_swaphi_lse
,
12310 gen_aarch64_compare_and_swapsi_lse
,
12311 gen_aarch64_compare_and_swapdi_lse
12314 bval
= operands
[0];
12315 rval
= operands
[1];
12317 oldval
= operands
[3];
12318 newval
= operands
[4];
12319 is_weak
= operands
[5];
12320 mod_s
= operands
[6];
12321 mod_f
= operands
[7];
12322 mode
= GET_MODE (mem
);
12325 /* Normally the succ memory model must be stronger than fail, but in the
12326 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12327 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12329 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
12330 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
12331 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
12337 /* For short modes, we're going to perform the comparison in SImode,
12338 so do the zero-extension now. */
12340 rval
= gen_reg_rtx (SImode
);
12341 oldval
= convert_modes (SImode
, mode
, oldval
, true);
12342 /* Fall through. */
12346 /* Force the value into a register if needed. */
12347 if (!aarch64_plus_operand (oldval
, mode
))
12348 oldval
= force_reg (cmp_mode
, oldval
);
12352 gcc_unreachable ();
12357 case E_QImode
: idx
= 0; break;
12358 case E_HImode
: idx
= 1; break;
12359 case E_SImode
: idx
= 2; break;
12360 case E_DImode
: idx
= 3; break;
12362 gcc_unreachable ();
12365 gen
= atomic_cas
[idx
];
12367 gen
= split_cas
[idx
];
12369 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
12371 if (mode
== QImode
|| mode
== HImode
)
12372 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
12374 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12375 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
12376 emit_insn (gen_rtx_SET (bval
, x
));
12379 /* Test whether the target supports using a atomic load-operate instruction.
12380 CODE is the operation and AFTER is TRUE if the data in memory after the
12381 operation should be returned and FALSE if the data before the operation
12382 should be returned. Returns FALSE if the operation isn't supported by the
12386 aarch64_atomic_ldop_supported_p (enum rtx_code code
)
12405 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12406 sequence implementing an atomic operation. */
12409 aarch64_emit_post_barrier (enum memmodel model
)
12411 const enum memmodel base_model
= memmodel_base (model
);
12413 if (is_mm_sync (model
)
12414 && (base_model
== MEMMODEL_ACQUIRE
12415 || base_model
== MEMMODEL_ACQ_REL
12416 || base_model
== MEMMODEL_SEQ_CST
))
12418 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
12422 /* Emit an atomic compare-and-swap operation. RVAL is the destination register
12423 for the data in memory. EXPECTED is the value expected to be in memory.
12424 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12425 is the memory ordering to use. */
12428 aarch64_gen_atomic_cas (rtx rval
, rtx mem
,
12429 rtx expected
, rtx desired
,
12432 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12435 mode
= GET_MODE (mem
);
12439 case E_QImode
: gen
= gen_aarch64_atomic_casqi
; break;
12440 case E_HImode
: gen
= gen_aarch64_atomic_cashi
; break;
12441 case E_SImode
: gen
= gen_aarch64_atomic_cassi
; break;
12442 case E_DImode
: gen
= gen_aarch64_atomic_casdi
; break;
12444 gcc_unreachable ();
12447 /* Move the expected value into the CAS destination register. */
12448 emit_insn (gen_rtx_SET (rval
, expected
));
12450 /* Emit the CAS. */
12451 emit_insn (gen (rval
, mem
, desired
, model
));
12453 /* Compare the expected value with the value loaded by the CAS, to establish
12454 whether the swap was made. */
12455 aarch64_gen_compare_reg (EQ
, rval
, expected
);
12458 /* Split a compare and swap pattern. */
12461 aarch64_split_compare_and_swap (rtx operands
[])
12463 rtx rval
, mem
, oldval
, newval
, scratch
;
12466 rtx_code_label
*label1
, *label2
;
12468 enum memmodel model
;
12471 rval
= operands
[0];
12473 oldval
= operands
[2];
12474 newval
= operands
[3];
12475 is_weak
= (operands
[4] != const0_rtx
);
12476 model_rtx
= operands
[5];
12477 scratch
= operands
[7];
12478 mode
= GET_MODE (mem
);
12479 model
= memmodel_from_int (INTVAL (model_rtx
));
12481 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12484 LD[A]XR rval, [mem]
12486 ST[L]XR scratch, newval, [mem]
12487 CBNZ scratch, .label1
12490 bool strong_zero_p
= !is_weak
&& oldval
== const0_rtx
;
12495 label1
= gen_label_rtx ();
12496 emit_label (label1
);
12498 label2
= gen_label_rtx ();
12500 /* The initial load can be relaxed for a __sync operation since a final
12501 barrier will be emitted to stop code hoisting. */
12502 if (is_mm_sync (model
))
12503 aarch64_emit_load_exclusive (mode
, rval
, mem
,
12504 GEN_INT (MEMMODEL_RELAXED
));
12506 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
12510 x
= gen_rtx_NE (VOIDmode
, rval
, const0_rtx
);
12511 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12512 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
12513 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12517 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
12518 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12519 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12520 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
12521 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12524 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
12528 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
12529 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12530 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
12531 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12535 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12536 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
12537 emit_insn (gen_rtx_SET (cond
, x
));
12540 emit_label (label2
);
12541 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12542 to set the condition flags. If this is not used it will be removed by
12546 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12547 x
= gen_rtx_COMPARE (CCmode
, rval
, const0_rtx
);
12548 emit_insn (gen_rtx_SET (cond
, x
));
12550 /* Emit any final barrier needed for a __sync operation. */
12551 if (is_mm_sync (model
))
12552 aarch64_emit_post_barrier (model
);
12555 /* Emit a BIC instruction. */
12558 aarch64_emit_bic (machine_mode mode
, rtx dst
, rtx s1
, rtx s2
, int shift
)
12560 rtx shift_rtx
= GEN_INT (shift
);
12561 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12565 case E_SImode
: gen
= gen_and_one_cmpl_lshrsi3
; break;
12566 case E_DImode
: gen
= gen_and_one_cmpl_lshrdi3
; break;
12568 gcc_unreachable ();
12571 emit_insn (gen (dst
, s2
, shift_rtx
, s1
));
12574 /* Emit an atomic swap. */
12577 aarch64_emit_atomic_swap (machine_mode mode
, rtx dst
, rtx value
,
12578 rtx mem
, rtx model
)
12580 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12584 case E_QImode
: gen
= gen_aarch64_atomic_swpqi
; break;
12585 case E_HImode
: gen
= gen_aarch64_atomic_swphi
; break;
12586 case E_SImode
: gen
= gen_aarch64_atomic_swpsi
; break;
12587 case E_DImode
: gen
= gen_aarch64_atomic_swpdi
; break;
12589 gcc_unreachable ();
12592 emit_insn (gen (dst
, mem
, value
, model
));
12595 /* Operations supported by aarch64_emit_atomic_load_op. */
12597 enum aarch64_atomic_load_op_code
12599 AARCH64_LDOP_PLUS
, /* A + B */
12600 AARCH64_LDOP_XOR
, /* A ^ B */
12601 AARCH64_LDOP_OR
, /* A | B */
12602 AARCH64_LDOP_BIC
/* A & ~B */
12605 /* Emit an atomic load-operate. */
12608 aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code
,
12609 machine_mode mode
, rtx dst
, rtx src
,
12610 rtx mem
, rtx model
)
12612 typedef rtx (*aarch64_atomic_load_op_fn
) (rtx
, rtx
, rtx
, rtx
);
12613 const aarch64_atomic_load_op_fn plus
[] =
12615 gen_aarch64_atomic_loadaddqi
,
12616 gen_aarch64_atomic_loadaddhi
,
12617 gen_aarch64_atomic_loadaddsi
,
12618 gen_aarch64_atomic_loadadddi
12620 const aarch64_atomic_load_op_fn eor
[] =
12622 gen_aarch64_atomic_loadeorqi
,
12623 gen_aarch64_atomic_loadeorhi
,
12624 gen_aarch64_atomic_loadeorsi
,
12625 gen_aarch64_atomic_loadeordi
12627 const aarch64_atomic_load_op_fn ior
[] =
12629 gen_aarch64_atomic_loadsetqi
,
12630 gen_aarch64_atomic_loadsethi
,
12631 gen_aarch64_atomic_loadsetsi
,
12632 gen_aarch64_atomic_loadsetdi
12634 const aarch64_atomic_load_op_fn bic
[] =
12636 gen_aarch64_atomic_loadclrqi
,
12637 gen_aarch64_atomic_loadclrhi
,
12638 gen_aarch64_atomic_loadclrsi
,
12639 gen_aarch64_atomic_loadclrdi
12641 aarch64_atomic_load_op_fn gen
;
12646 case E_QImode
: idx
= 0; break;
12647 case E_HImode
: idx
= 1; break;
12648 case E_SImode
: idx
= 2; break;
12649 case E_DImode
: idx
= 3; break;
12651 gcc_unreachable ();
12656 case AARCH64_LDOP_PLUS
: gen
= plus
[idx
]; break;
12657 case AARCH64_LDOP_XOR
: gen
= eor
[idx
]; break;
12658 case AARCH64_LDOP_OR
: gen
= ior
[idx
]; break;
12659 case AARCH64_LDOP_BIC
: gen
= bic
[idx
]; break;
12661 gcc_unreachable ();
12664 emit_insn (gen (dst
, mem
, src
, model
));
12667 /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
12668 location to store the data read from memory. OUT_RESULT is the location to
12669 store the result of the operation. MEM is the memory location to read and
12670 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12671 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12675 aarch64_gen_atomic_ldop (enum rtx_code code
, rtx out_data
, rtx out_result
,
12676 rtx mem
, rtx value
, rtx model_rtx
)
12678 machine_mode mode
= GET_MODE (mem
);
12679 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12680 const bool short_mode
= (mode
< SImode
);
12681 aarch64_atomic_load_op_code ldop_code
;
12686 out_data
= gen_lowpart (mode
, out_data
);
12689 out_result
= gen_lowpart (mode
, out_result
);
12691 /* Make sure the value is in a register, putting it into a destination
12692 register if it needs to be manipulated. */
12693 if (!register_operand (value
, mode
)
12694 || code
== AND
|| code
== MINUS
)
12696 src
= out_result
? out_result
: out_data
;
12697 emit_move_insn (src
, gen_lowpart (mode
, value
));
12701 gcc_assert (register_operand (src
, mode
));
12703 /* Preprocess the data for the operation as necessary. If the operation is
12704 a SET then emit a swap instruction and finish. */
12708 aarch64_emit_atomic_swap (mode
, out_data
, src
, mem
, model_rtx
);
12712 /* Negate the value and treat it as a PLUS. */
12716 /* Resize the value if necessary. */
12718 src
= gen_lowpart (wmode
, src
);
12720 neg_src
= gen_rtx_NEG (wmode
, src
);
12721 emit_insn (gen_rtx_SET (src
, neg_src
));
12724 src
= gen_lowpart (mode
, src
);
12726 /* Fall-through. */
12728 ldop_code
= AARCH64_LDOP_PLUS
;
12732 ldop_code
= AARCH64_LDOP_OR
;
12736 ldop_code
= AARCH64_LDOP_XOR
;
12743 /* Resize the value if necessary. */
12745 src
= gen_lowpart (wmode
, src
);
12747 not_src
= gen_rtx_NOT (wmode
, src
);
12748 emit_insn (gen_rtx_SET (src
, not_src
));
12751 src
= gen_lowpart (mode
, src
);
12753 ldop_code
= AARCH64_LDOP_BIC
;
12757 /* The operation can't be done with atomic instructions. */
12758 gcc_unreachable ();
12761 aarch64_emit_atomic_load_op (ldop_code
, mode
, out_data
, src
, mem
, model_rtx
);
12763 /* If necessary, calculate the data in memory after the update by redoing the
12764 operation from values in registers. */
12770 src
= gen_lowpart (wmode
, src
);
12771 out_data
= gen_lowpart (wmode
, out_data
);
12772 out_result
= gen_lowpart (wmode
, out_result
);
12781 x
= gen_rtx_PLUS (wmode
, out_data
, src
);
12784 x
= gen_rtx_IOR (wmode
, out_data
, src
);
12787 x
= gen_rtx_XOR (wmode
, out_data
, src
);
12790 aarch64_emit_bic (wmode
, out_result
, out_data
, src
, 0);
12793 gcc_unreachable ();
12796 emit_set_insn (out_result
, x
);
12801 /* Split an atomic operation. */
12804 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
12805 rtx value
, rtx model_rtx
, rtx cond
)
12807 machine_mode mode
= GET_MODE (mem
);
12808 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12809 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
12810 const bool is_sync
= is_mm_sync (model
);
12811 rtx_code_label
*label
;
12814 /* Split the atomic operation into a sequence. */
12815 label
= gen_label_rtx ();
12816 emit_label (label
);
12819 new_out
= gen_lowpart (wmode
, new_out
);
12821 old_out
= gen_lowpart (wmode
, old_out
);
12824 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
12826 /* The initial load can be relaxed for a __sync operation since a final
12827 barrier will be emitted to stop code hoisting. */
12829 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
12830 GEN_INT (MEMMODEL_RELAXED
));
12832 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
12841 x
= gen_rtx_AND (wmode
, old_out
, value
);
12842 emit_insn (gen_rtx_SET (new_out
, x
));
12843 x
= gen_rtx_NOT (wmode
, new_out
);
12844 emit_insn (gen_rtx_SET (new_out
, x
));
12848 if (CONST_INT_P (value
))
12850 value
= GEN_INT (-INTVAL (value
));
12853 /* Fall through. */
12856 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
12857 emit_insn (gen_rtx_SET (new_out
, x
));
12861 aarch64_emit_store_exclusive (mode
, cond
, mem
,
12862 gen_lowpart (mode
, new_out
), model_rtx
);
12864 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12865 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12866 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
12867 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12869 /* Emit any final barrier needed for a __sync operation. */
12871 aarch64_emit_post_barrier (model
);
12875 aarch64_init_libfuncs (void)
12877 /* Half-precision float operations. The compiler handles all operations
12878 with NULL libfuncs by converting to SFmode. */
12881 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
, "__gnu_f2h_ieee");
12882 set_conv_libfunc (sext_optab
, SFmode
, HFmode
, "__gnu_h2f_ieee");
12885 set_optab_libfunc (add_optab
, HFmode
, NULL
);
12886 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
12887 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
12888 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
12889 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
12892 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
12893 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
12894 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
12895 set_optab_libfunc (le_optab
, HFmode
, NULL
);
12896 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
12897 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
12898 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
12901 /* Target hook for c_mode_for_suffix. */
12902 static machine_mode
12903 aarch64_c_mode_for_suffix (char suffix
)
12911 /* We can only represent floating point constants which will fit in
12912 "quarter-precision" values. These values are characterised by
12913 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12916 (-1)^s * (n/16) * 2^r
12919 's' is the sign bit.
12920 'n' is an integer in the range 16 <= n <= 31.
12921 'r' is an integer in the range -3 <= r <= 4. */
12923 /* Return true iff X can be represented by a quarter-precision
12924 floating point immediate operand X. Note, we cannot represent 0.0. */
12926 aarch64_float_const_representable_p (rtx x
)
12928 /* This represents our current view of how many bits
12929 make up the mantissa. */
12930 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12932 unsigned HOST_WIDE_INT mantissa
, mask
;
12933 REAL_VALUE_TYPE r
, m
;
12936 if (!CONST_DOUBLE_P (x
))
12939 /* We don't support HFmode constants yet. */
12940 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == HFmode
)
12943 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12945 /* We cannot represent infinities, NaNs or +/-zero. We won't
12946 know if we have +zero until we analyse the mantissa, but we
12947 can reject the other invalid values. */
12948 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
12949 || REAL_VALUE_MINUS_ZERO (r
))
12952 /* Extract exponent. */
12953 r
= real_value_abs (&r
);
12954 exponent
= REAL_EXP (&r
);
12956 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12957 highest (sign) bit, with a fixed binary point at bit point_pos.
12958 m1 holds the low part of the mantissa, m2 the high part.
12959 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12960 bits for the mantissa, this can fail (low bits will be lost). */
12961 real_ldexp (&m
, &r
, point_pos
- exponent
);
12962 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12964 /* If the low part of the mantissa has bits set we cannot represent
12966 if (w
.ulow () != 0)
12968 /* We have rejected the lower HOST_WIDE_INT, so update our
12969 understanding of how many bits lie in the mantissa and
12970 look only at the high HOST_WIDE_INT. */
12971 mantissa
= w
.elt (1);
12972 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12974 /* We can only represent values with a mantissa of the form 1.xxxx. */
12975 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12976 if ((mantissa
& mask
) != 0)
12979 /* Having filtered unrepresentable values, we may now remove all
12980 but the highest 5 bits. */
12981 mantissa
>>= point_pos
- 5;
12983 /* We cannot represent the value 0.0, so reject it. This is handled
12988 /* Then, as bit 4 is always set, we can mask it off, leaving
12989 the mantissa in the range [0, 15]. */
12990 mantissa
&= ~(1 << 4);
12991 gcc_assert (mantissa
<= 15);
12993 /* GCC internally does not use IEEE754-like encoding (where normalized
12994 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12995 Our mantissa values are shifted 4 places to the left relative to
12996 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12997 by 5 places to correct for GCC's representation. */
12998 exponent
= 5 - exponent
;
13000 return (exponent
>= 0 && exponent
<= 7);
13003 /* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
13004 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
13005 output MOVI/MVNI, ORR or BIC immediate. */
13007 aarch64_output_simd_mov_immediate (rtx const_vector
,
13010 enum simd_immediate_check which
)
13013 static char templ
[40];
13014 const char *mnemonic
;
13015 const char *shift_op
;
13016 unsigned int lane_count
= 0;
13019 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
13021 /* This will return true to show const_vector is legal for use as either
13022 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
13023 It will also update INFO to show how the immediate should be generated.
13024 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
13025 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false,
13027 gcc_assert (is_valid
);
13029 element_char
= sizetochar (info
.element_width
);
13030 lane_count
= width
/ info
.element_width
;
13032 mode
= GET_MODE_INNER (mode
);
13033 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
13035 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
13036 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
13037 move immediate path. */
13038 if (aarch64_float_const_zero_rtx_p (info
.value
))
13039 info
.value
= GEN_INT (0);
13042 const unsigned int buf_size
= 20;
13043 char float_buf
[buf_size
] = {'\0'};
13044 real_to_decimal_for_mode (float_buf
,
13045 CONST_DOUBLE_REAL_VALUE (info
.value
),
13046 buf_size
, buf_size
, 1, mode
);
13048 if (lane_count
== 1)
13049 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
13051 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
13052 lane_count
, element_char
, float_buf
);
13057 gcc_assert (CONST_INT_P (info
.value
));
13059 if (which
== AARCH64_CHECK_MOV
)
13061 mnemonic
= info
.mvn
? "mvni" : "movi";
13062 shift_op
= info
.msl
? "msl" : "lsl";
13063 if (lane_count
== 1)
13064 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
13065 mnemonic
, UINTVAL (info
.value
));
13066 else if (info
.shift
)
13067 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, "
13068 HOST_WIDE_INT_PRINT_HEX
", %s %d", mnemonic
, lane_count
,
13069 element_char
, UINTVAL (info
.value
), shift_op
, info
.shift
);
13071 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, "
13072 HOST_WIDE_INT_PRINT_HEX
, mnemonic
, lane_count
,
13073 element_char
, UINTVAL (info
.value
));
13077 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
13078 mnemonic
= info
.mvn
? "bic" : "orr";
13080 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, #"
13081 HOST_WIDE_INT_PRINT_DEC
", %s #%d", mnemonic
, lane_count
,
13082 element_char
, UINTVAL (info
.value
), "lsl", info
.shift
);
13084 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, #"
13085 HOST_WIDE_INT_PRINT_DEC
, mnemonic
, lane_count
,
13086 element_char
, UINTVAL (info
.value
));
13092 aarch64_output_scalar_simd_mov_immediate (rtx immediate
, scalar_int_mode mode
)
13095 /* If a floating point number was passed and we desire to use it in an
13096 integer mode do the conversion to integer. */
13097 if (CONST_DOUBLE_P (immediate
) && GET_MODE_CLASS (mode
) == MODE_INT
)
13099 unsigned HOST_WIDE_INT ival
;
13100 if (!aarch64_reinterpret_float_as_int (immediate
, &ival
))
13101 gcc_unreachable ();
13102 immediate
= gen_int_mode (ival
, mode
);
13105 machine_mode vmode
;
13106 /* use a 64 bit mode for everything except for DI/DF mode, where we use
13107 a 128 bit vector mode. */
13108 int width
= GET_MODE_BITSIZE (mode
) == 64 ? 128 : 64;
13110 vmode
= aarch64_simd_container_mode (mode
, width
);
13111 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
13112 return aarch64_output_simd_mov_immediate (v_op
, vmode
, width
);
13115 /* Split operands into moves from op[1] + op[2] into op[0]. */
13118 aarch64_split_combinev16qi (rtx operands
[3])
13120 unsigned int dest
= REGNO (operands
[0]);
13121 unsigned int src1
= REGNO (operands
[1]);
13122 unsigned int src2
= REGNO (operands
[2]);
13123 machine_mode halfmode
= GET_MODE (operands
[1]);
13124 unsigned int halfregs
= REG_NREGS (operands
[1]);
13125 rtx destlo
, desthi
;
13127 gcc_assert (halfmode
== V16QImode
);
13129 if (src1
== dest
&& src2
== dest
+ halfregs
)
13131 /* No-op move. Can't split to nothing; emit something. */
13132 emit_note (NOTE_INSN_DELETED
);
13136 /* Preserve register attributes for variable tracking. */
13137 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
13138 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
13139 GET_MODE_SIZE (halfmode
));
13141 /* Special case of reversed high/low parts. */
13142 if (reg_overlap_mentioned_p (operands
[2], destlo
)
13143 && reg_overlap_mentioned_p (operands
[1], desthi
))
13145 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
13146 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
13147 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
13149 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
13151 /* Try to avoid unnecessary moves if part of the result
13152 is in the right place already. */
13154 emit_move_insn (destlo
, operands
[1]);
13155 if (src2
!= dest
+ halfregs
)
13156 emit_move_insn (desthi
, operands
[2]);
13160 if (src2
!= dest
+ halfregs
)
13161 emit_move_insn (desthi
, operands
[2]);
13163 emit_move_insn (destlo
, operands
[1]);
13167 /* vec_perm support. */
13169 #define MAX_VECT_LEN 16
13171 struct expand_vec_perm_d
13173 rtx target
, op0
, op1
;
13174 auto_vec_perm_indices perm
;
13175 machine_mode vmode
;
13180 /* Generate a variable permutation. */
13183 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
13185 machine_mode vmode
= GET_MODE (target
);
13186 bool one_vector_p
= rtx_equal_p (op0
, op1
);
13188 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
13189 gcc_checking_assert (GET_MODE (op0
) == vmode
);
13190 gcc_checking_assert (GET_MODE (op1
) == vmode
);
13191 gcc_checking_assert (GET_MODE (sel
) == vmode
);
13192 gcc_checking_assert (TARGET_SIMD
);
13196 if (vmode
== V8QImode
)
13198 /* Expand the argument to a V16QI mode by duplicating it. */
13199 rtx pair
= gen_reg_rtx (V16QImode
);
13200 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
13201 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
13205 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
13212 if (vmode
== V8QImode
)
13214 pair
= gen_reg_rtx (V16QImode
);
13215 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
13216 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
13220 pair
= gen_reg_rtx (OImode
);
13221 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
13222 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
13227 /* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
13228 NELT is the number of elements in the vector. */
13231 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
,
13234 machine_mode vmode
= GET_MODE (target
);
13235 bool one_vector_p
= rtx_equal_p (op0
, op1
);
13238 /* The TBL instruction does not use a modulo index, so we must take care
13239 of that ourselves. */
13240 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
13241 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13242 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
13244 /* For big-endian, we also need to reverse the index within the vector
13245 (but not which vector). */
13246 if (BYTES_BIG_ENDIAN
)
13248 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
13250 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
13251 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
13252 NULL
, 0, OPTAB_LIB_WIDEN
);
13254 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
13257 /* Recognize patterns suitable for the TRN instructions. */
13259 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
13261 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
13262 rtx out
, in0
, in1
, x
;
13263 machine_mode vmode
= d
->vmode
;
13265 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
13268 /* Note that these are little-endian tests.
13269 We correct for big-endian later. */
13270 if (d
->perm
[0] == 0)
13272 else if (d
->perm
[0] == 1)
13276 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13278 for (i
= 0; i
< nelt
; i
+= 2)
13280 if (d
->perm
[i
] != i
+ odd
)
13282 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
13292 if (BYTES_BIG_ENDIAN
)
13294 x
= in0
, in0
= in1
, in1
= x
;
13299 emit_set_insn (out
, gen_rtx_UNSPEC (vmode
, gen_rtvec (2, in0
, in1
),
13300 odd
? UNSPEC_TRN2
: UNSPEC_TRN1
));
13304 /* Recognize patterns suitable for the UZP instructions. */
13306 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
13308 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
13309 rtx out
, in0
, in1
, x
;
13310 machine_mode vmode
= d
->vmode
;
13312 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
13315 /* Note that these are little-endian tests.
13316 We correct for big-endian later. */
13317 if (d
->perm
[0] == 0)
13319 else if (d
->perm
[0] == 1)
13323 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13325 for (i
= 0; i
< nelt
; i
++)
13327 unsigned elt
= (i
* 2 + odd
) & mask
;
13328 if (d
->perm
[i
] != elt
)
13338 if (BYTES_BIG_ENDIAN
)
13340 x
= in0
, in0
= in1
, in1
= x
;
13345 emit_set_insn (out
, gen_rtx_UNSPEC (vmode
, gen_rtvec (2, in0
, in1
),
13346 odd
? UNSPEC_UZP2
: UNSPEC_UZP1
));
13350 /* Recognize patterns suitable for the ZIP instructions. */
13352 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
13354 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
13355 rtx out
, in0
, in1
, x
;
13356 machine_mode vmode
= d
->vmode
;
13358 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
13361 /* Note that these are little-endian tests.
13362 We correct for big-endian later. */
13364 if (d
->perm
[0] == high
)
13367 else if (d
->perm
[0] == 0)
13371 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13373 for (i
= 0; i
< nelt
/ 2; i
++)
13375 unsigned elt
= (i
+ high
) & mask
;
13376 if (d
->perm
[i
* 2] != elt
)
13378 elt
= (elt
+ nelt
) & mask
;
13379 if (d
->perm
[i
* 2 + 1] != elt
)
13389 if (BYTES_BIG_ENDIAN
)
13391 x
= in0
, in0
= in1
, in1
= x
;
13396 emit_set_insn (out
, gen_rtx_UNSPEC (vmode
, gen_rtvec (2, in0
, in1
),
13397 high
? UNSPEC_ZIP2
: UNSPEC_ZIP1
));
13401 /* Recognize patterns for the EXT insn. */
13404 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
13406 unsigned int i
, nelt
= d
->perm
.length ();
13409 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
13411 /* Check if the extracted indices are increasing by one. */
13412 for (i
= 1; i
< nelt
; i
++)
13414 unsigned int required
= location
+ i
;
13415 if (d
->one_vector_p
)
13417 /* We'll pass the same vector in twice, so allow indices to wrap. */
13418 required
&= (nelt
- 1);
13420 if (d
->perm
[i
] != required
)
13428 /* The case where (location == 0) is a no-op for both big- and little-endian,
13429 and is removed by the mid-end at optimization levels -O1 and higher. */
13431 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
13433 /* After setup, we want the high elements of the first vector (stored
13434 at the LSB end of the register), and the low elements of the second
13435 vector (stored at the MSB end of the register). So swap. */
13436 std::swap (d
->op0
, d
->op1
);
13437 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13438 location
= nelt
- location
;
13441 offset
= GEN_INT (location
);
13442 emit_set_insn (d
->target
,
13443 gen_rtx_UNSPEC (d
->vmode
,
13444 gen_rtvec (3, d
->op0
, d
->op1
, offset
),
13449 /* Recognize patterns for the REV insns. */
13452 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
13454 unsigned int i
, j
, diff
, size
, unspec
, nelt
= d
->perm
.length ();
13456 if (!d
->one_vector_p
)
13460 size
= (diff
+ 1) * GET_MODE_UNIT_SIZE (d
->vmode
);
13462 unspec
= UNSPEC_REV64
;
13463 else if (size
== 4)
13464 unspec
= UNSPEC_REV32
;
13465 else if (size
== 2)
13466 unspec
= UNSPEC_REV16
;
13470 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
13471 for (j
= 0; j
<= diff
; j
+= 1)
13473 /* This is guaranteed to be true as the value of diff
13474 is 7, 3, 1 and we should have enough elements in the
13475 queue to generate this. Getting a vector mask with a
13476 value of diff other than these values implies that
13477 something is wrong by the time we get here. */
13478 gcc_assert (i
+ j
< nelt
);
13479 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
13487 emit_set_insn (d
->target
, gen_rtx_UNSPEC (d
->vmode
, gen_rtvec (1, d
->op0
),
13493 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
13495 rtx out
= d
->target
;
13497 machine_mode vmode
= d
->vmode
;
13498 unsigned int i
, elt
, nelt
= d
->perm
.length ();
13502 for (i
= 1; i
< nelt
; i
++)
13504 if (elt
!= d
->perm
[i
])
13508 /* The generic preparation in aarch64_expand_vec_perm_const_1
13509 swaps the operand order and the permute indices if it finds
13510 d->perm[0] to be in the second operand. Thus, we can always
13511 use d->op0 and need not do any extra arithmetic to get the
13512 correct lane number. */
13514 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
13516 rtx parallel
= gen_rtx_PARALLEL (vmode
, gen_rtvec (1, lane
));
13517 rtx select
= gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode
), in0
, parallel
);
13518 emit_set_insn (out
, gen_rtx_VEC_DUPLICATE (vmode
, select
));
13523 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
13525 rtx rperm
[MAX_VECT_LEN
], sel
;
13526 machine_mode vmode
= d
->vmode
;
13527 unsigned int i
, nelt
= d
->perm
.length ();
13532 /* Generic code will try constant permutation twice. Once with the
13533 original mode and again with the elements lowered to QImode.
13534 So wait and don't do the selector expansion ourselves. */
13535 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
13538 for (i
= 0; i
< nelt
; ++i
)
13540 int nunits
= GET_MODE_NUNITS (vmode
);
13542 /* If big-endian and two vectors we end up with a weird mixed-endian
13543 mode on NEON. Reverse the index within each word but not the word
13545 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
13548 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
13549 sel
= force_reg (vmode
, sel
);
13551 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
13556 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
13558 /* The pattern matching functions above are written to look for a small
13559 number to begin the sequence (0, 1, N/2). If we begin with an index
13560 from the second operand, we can swap the operands. */
13561 unsigned int nelt
= d
->perm
.length ();
13562 if (d
->perm
[0] >= nelt
)
13564 gcc_assert (nelt
== (nelt
& -nelt
));
13565 for (unsigned int i
= 0; i
< nelt
; ++i
)
13566 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
13568 std::swap (d
->op0
, d
->op1
);
13571 if (TARGET_SIMD
&& nelt
> 1)
13573 if (aarch64_evpc_rev (d
))
13575 else if (aarch64_evpc_ext (d
))
13577 else if (aarch64_evpc_dup (d
))
13579 else if (aarch64_evpc_zip (d
))
13581 else if (aarch64_evpc_uzp (d
))
13583 else if (aarch64_evpc_trn (d
))
13585 return aarch64_evpc_tbl (d
);
13590 /* Expand a vec_perm_const pattern with the operands given by TARGET,
13591 OP0, OP1 and SEL. NELT is the number of elements in the vector. */
13594 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
,
13597 struct expand_vec_perm_d d
;
13598 unsigned int i
, which
;
13604 d
.vmode
= GET_MODE (target
);
13605 gcc_assert (VECTOR_MODE_P (d
.vmode
));
13606 d
.testing_p
= false;
13608 d
.perm
.reserve (nelt
);
13609 for (i
= which
= 0; i
< nelt
; ++i
)
13611 rtx e
= XVECEXP (sel
, 0, i
);
13612 unsigned int ei
= INTVAL (e
) & (2 * nelt
- 1);
13613 which
|= (ei
< nelt
? 1 : 2);
13614 d
.perm
.quick_push (ei
);
13620 gcc_unreachable ();
13623 d
.one_vector_p
= false;
13624 if (!rtx_equal_p (op0
, op1
))
13627 /* The elements of PERM do not suggest that only the first operand
13628 is used, but both operands are identical. Allow easier matching
13629 of the permutation by folding the permutation into the single
13631 /* Fall Through. */
13633 for (i
= 0; i
< nelt
; ++i
)
13634 d
.perm
[i
] &= nelt
- 1;
13636 d
.one_vector_p
= true;
13641 d
.one_vector_p
= true;
13645 return aarch64_expand_vec_perm_const_1 (&d
);
13649 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
, vec_perm_indices sel
)
13651 struct expand_vec_perm_d d
;
13652 unsigned int i
, nelt
, which
;
13656 d
.testing_p
= true;
13657 d
.perm
.safe_splice (sel
);
13659 /* Calculate whether all elements are in one vector. */
13660 nelt
= sel
.length ();
13661 for (i
= which
= 0; i
< nelt
; ++i
)
13663 unsigned int e
= d
.perm
[i
];
13664 gcc_assert (e
< 2 * nelt
);
13665 which
|= (e
< nelt
? 1 : 2);
13668 /* If all elements are from the second vector, reindex as if from the
13671 for (i
= 0; i
< nelt
; ++i
)
13674 /* Check whether the mask can be applied to a single vector. */
13675 d
.one_vector_p
= (which
!= 3);
13677 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
13678 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
13679 if (!d
.one_vector_p
)
13680 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
13683 ret
= aarch64_expand_vec_perm_const_1 (&d
);
13689 /* Generate a byte permute mask for a register of mode MODE,
13690 which has NUNITS units. */
13693 aarch64_reverse_mask (machine_mode mode
, unsigned int nunits
)
13695 /* We have to reverse each vector because we dont have
13696 a permuted load that can reverse-load according to ABI rules. */
13698 rtvec v
= rtvec_alloc (16);
13700 unsigned int usize
= GET_MODE_UNIT_SIZE (mode
);
13702 gcc_assert (BYTES_BIG_ENDIAN
);
13703 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
13705 for (i
= 0; i
< nunits
; i
++)
13706 for (j
= 0; j
< usize
; j
++)
13707 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
13708 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
13709 return force_reg (V16QImode
, mask
);
13712 /* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
13713 true. However due to issues with register allocation it is preferable
13714 to avoid tieing integer scalar and FP scalar modes. Executing integer
13715 operations in general registers is better than treating them as scalar
13716 vector operations. This reduces latency and avoids redundant int<->FP
13717 moves. So tie modes if they are either the same class, or vector modes
13718 with other vector modes, vector structs or any scalar mode. */
13721 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
13723 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
13726 /* We specifically want to allow elements of "structure" modes to
13727 be tieable to the structure. This more general condition allows
13728 other rarer situations too. */
13729 if (aarch64_vector_mode_p (mode1
) && aarch64_vector_mode_p (mode2
))
13732 /* Also allow any scalar modes with vectors. */
13733 if (aarch64_vector_mode_supported_p (mode1
)
13734 || aarch64_vector_mode_supported_p (mode2
))
13740 /* Return a new RTX holding the result of moving POINTER forward by
13744 aarch64_move_pointer (rtx pointer
, int amount
)
13746 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
13748 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
13752 /* Return a new RTX holding the result of moving POINTER forward by the
13753 size of the mode it points to. */
13756 aarch64_progress_pointer (rtx pointer
)
13758 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
13760 return aarch64_move_pointer (pointer
, amount
);
13763 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13767 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
13770 rtx reg
= gen_reg_rtx (mode
);
13772 /* "Cast" the pointers to the correct mode. */
13773 *src
= adjust_address (*src
, mode
, 0);
13774 *dst
= adjust_address (*dst
, mode
, 0);
13775 /* Emit the memcpy. */
13776 emit_move_insn (reg
, *src
);
13777 emit_move_insn (*dst
, reg
);
13778 /* Move the pointers forward. */
13779 *src
= aarch64_progress_pointer (*src
);
13780 *dst
= aarch64_progress_pointer (*dst
);
13783 /* Expand movmem, as if from a __builtin_memcpy. Return true if
13784 we succeed, otherwise return false. */
13787 aarch64_expand_movmem (rtx
*operands
)
13790 rtx dst
= operands
[0];
13791 rtx src
= operands
[1];
13793 bool speed_p
= !optimize_function_for_size_p (cfun
);
13795 /* When optimizing for size, give a better estimate of the length of a
13796 memcpy call, but use the default otherwise. */
13797 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
13799 /* We can't do anything smart if the amount to copy is not constant. */
13800 if (!CONST_INT_P (operands
[2]))
13803 n
= UINTVAL (operands
[2]);
13805 /* Try to keep the number of instructions low. For cases below 16 bytes we
13806 need to make at most two moves. For cases above 16 bytes it will be one
13807 move for each 16 byte chunk, then at most two additional moves. */
13808 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
13811 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13812 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13814 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13815 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13817 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13823 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13828 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13833 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13834 4-byte chunk, partially overlapping with the previously copied chunk. */
13837 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13843 src
= aarch64_move_pointer (src
, move
);
13844 dst
= aarch64_move_pointer (dst
, move
);
13845 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13850 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13851 them, then (if applicable) an 8-byte chunk. */
13856 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
13861 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13866 /* Finish the final bytes of the copy. We can always do this in one
13867 instruction. We either copy the exact amount we need, or partially
13868 overlap with the previous chunk we copied and copy 8-bytes. */
13872 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13874 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13876 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13881 src
= aarch64_move_pointer (src
, -1);
13882 dst
= aarch64_move_pointer (dst
, -1);
13883 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13889 src
= aarch64_move_pointer (src
, move
);
13890 dst
= aarch64_move_pointer (dst
, move
);
13891 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13898 /* Split a DImode store of a CONST_INT SRC to MEM DST as two
13899 SImode stores. Handle the case when the constant has identical
13900 bottom and top halves. This is beneficial when the two stores can be
13901 merged into an STP and we avoid synthesising potentially expensive
13902 immediates twice. Return true if such a split is possible. */
13905 aarch64_split_dimode_const_store (rtx dst
, rtx src
)
13907 rtx lo
= gen_lowpart (SImode
, src
);
13908 rtx hi
= gen_highpart_mode (SImode
, DImode
, src
);
13910 bool size_p
= optimize_function_for_size_p (cfun
);
13912 if (!rtx_equal_p (lo
, hi
))
13915 unsigned int orig_cost
13916 = aarch64_internal_mov_immediate (NULL_RTX
, src
, false, DImode
);
13917 unsigned int lo_cost
13918 = aarch64_internal_mov_immediate (NULL_RTX
, lo
, false, SImode
);
13920 /* We want to transform:
13922 MOVK x1, 0x140, lsl 16
13923 MOVK x1, 0xc0da, lsl 32
13924 MOVK x1, 0x140, lsl 48
13928 MOVK w1, 0x140, lsl 16
13930 So we want to perform this only when we save two instructions
13931 or more. When optimizing for size, however, accept any code size
13933 if (size_p
&& orig_cost
<= lo_cost
)
13937 && (orig_cost
<= lo_cost
+ 1))
13940 rtx mem_lo
= adjust_address (dst
, SImode
, 0);
13941 if (!aarch64_mem_pair_operand (mem_lo
, SImode
))
13944 rtx tmp_reg
= gen_reg_rtx (SImode
);
13945 aarch64_expand_mov_immediate (tmp_reg
, lo
);
13946 rtx mem_hi
= aarch64_move_pointer (mem_lo
, GET_MODE_SIZE (SImode
));
13947 /* Don't emit an explicit store pair as this may not be always profitable.
13948 Let the sched-fusion logic decide whether to merge them. */
13949 emit_move_insn (mem_lo
, tmp_reg
);
13950 emit_move_insn (mem_hi
, tmp_reg
);
13955 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13957 static unsigned HOST_WIDE_INT
13958 aarch64_asan_shadow_offset (void)
13960 return (HOST_WIDE_INT_1
<< 36);
13964 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
13965 unsigned int align
,
13966 enum by_pieces_operation op
,
13969 /* STORE_BY_PIECES can be used when copying a constant string, but
13970 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13971 For now we always fail this and let the move_by_pieces code copy
13972 the string from read-only memory. */
13973 if (op
== STORE_BY_PIECES
)
13976 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
13980 aarch64_gen_ccmp_first (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
,
13981 int code
, tree treeop0
, tree treeop1
)
13983 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
13985 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
13987 struct expand_operand ops
[4];
13990 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
13992 op_mode
= GET_MODE (op0
);
13993 if (op_mode
== VOIDmode
)
13994 op_mode
= GET_MODE (op1
);
14002 icode
= CODE_FOR_cmpsi
;
14007 icode
= CODE_FOR_cmpdi
;
14012 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
14013 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpesf
: CODE_FOR_fcmpsf
;
14018 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
14019 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpedf
: CODE_FOR_fcmpdf
;
14027 op0
= prepare_operand (icode
, op0
, 0, op_mode
, cmp_mode
, unsignedp
);
14028 op1
= prepare_operand (icode
, op1
, 1, op_mode
, cmp_mode
, unsignedp
);
14034 *prep_seq
= get_insns ();
14037 create_fixed_operand (&ops
[0], op0
);
14038 create_fixed_operand (&ops
[1], op1
);
14041 if (!maybe_expand_insn (icode
, 2, ops
))
14046 *gen_seq
= get_insns ();
14049 return gen_rtx_fmt_ee ((rtx_code
) code
, cc_mode
,
14050 gen_rtx_REG (cc_mode
, CC_REGNUM
), const0_rtx
);
14054 aarch64_gen_ccmp_next (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
, rtx prev
,
14055 int cmp_code
, tree treeop0
, tree treeop1
, int bit_code
)
14057 rtx op0
, op1
, target
;
14058 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
14059 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
14061 struct expand_operand ops
[6];
14064 push_to_sequence (*prep_seq
);
14065 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
14067 op_mode
= GET_MODE (op0
);
14068 if (op_mode
== VOIDmode
)
14069 op_mode
= GET_MODE (op1
);
14077 icode
= CODE_FOR_ccmpsi
;
14082 icode
= CODE_FOR_ccmpdi
;
14087 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
14088 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpesf
: CODE_FOR_fccmpsf
;
14093 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
14094 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpedf
: CODE_FOR_fccmpdf
;
14102 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
14103 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
14109 *prep_seq
= get_insns ();
14112 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
14113 aarch64_cond
= aarch64_get_condition_code_1 (cc_mode
, (rtx_code
) cmp_code
);
14115 if (bit_code
!= AND
)
14117 prev
= gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev
),
14118 GET_MODE (XEXP (prev
, 0))),
14119 VOIDmode
, XEXP (prev
, 0), const0_rtx
);
14120 aarch64_cond
= AARCH64_INVERSE_CONDITION_CODE (aarch64_cond
);
14123 create_fixed_operand (&ops
[0], XEXP (prev
, 0));
14124 create_fixed_operand (&ops
[1], target
);
14125 create_fixed_operand (&ops
[2], op0
);
14126 create_fixed_operand (&ops
[3], op1
);
14127 create_fixed_operand (&ops
[4], prev
);
14128 create_fixed_operand (&ops
[5], GEN_INT (aarch64_cond
));
14130 push_to_sequence (*gen_seq
);
14131 if (!maybe_expand_insn (icode
, 6, ops
))
14137 *gen_seq
= get_insns ();
14140 return gen_rtx_fmt_ee ((rtx_code
) cmp_code
, VOIDmode
, target
, const0_rtx
);
14143 #undef TARGET_GEN_CCMP_FIRST
14144 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14146 #undef TARGET_GEN_CCMP_NEXT
14147 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14149 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14150 instruction fusion of some sort. */
14153 aarch64_macro_fusion_p (void)
14155 return aarch64_tune_params
.fusible_ops
!= AARCH64_FUSE_NOTHING
;
14159 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14160 should be kept together during scheduling. */
14163 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
14166 rtx prev_set
= single_set (prev
);
14167 rtx curr_set
= single_set (curr
);
14168 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14169 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
14171 if (!aarch64_macro_fusion_p ())
14174 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK
))
14176 /* We are trying to match:
14177 prev (mov) == (set (reg r0) (const_int imm16))
14178 curr (movk) == (set (zero_extract (reg r0)
14181 (const_int imm16_1)) */
14183 set_dest
= SET_DEST (curr_set
);
14185 if (GET_CODE (set_dest
) == ZERO_EXTRACT
14186 && CONST_INT_P (SET_SRC (curr_set
))
14187 && CONST_INT_P (SET_SRC (prev_set
))
14188 && CONST_INT_P (XEXP (set_dest
, 2))
14189 && INTVAL (XEXP (set_dest
, 2)) == 16
14190 && REG_P (XEXP (set_dest
, 0))
14191 && REG_P (SET_DEST (prev_set
))
14192 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
14198 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD
))
14201 /* We're trying to match:
14202 prev (adrp) == (set (reg r1)
14203 (high (symbol_ref ("SYM"))))
14204 curr (add) == (set (reg r0)
14206 (symbol_ref ("SYM"))))
14207 Note that r0 need not necessarily be the same as r1, especially
14208 during pre-regalloc scheduling. */
14210 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
14211 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
14213 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
14214 && REG_P (XEXP (SET_SRC (curr_set
), 0))
14215 && REGNO (XEXP (SET_SRC (curr_set
), 0))
14216 == REGNO (SET_DEST (prev_set
))
14217 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
14218 XEXP (SET_SRC (curr_set
), 1)))
14223 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK
))
14226 /* We're trying to match:
14227 prev (movk) == (set (zero_extract (reg r0)
14230 (const_int imm16_1))
14231 curr (movk) == (set (zero_extract (reg r0)
14234 (const_int imm16_2)) */
14236 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
14237 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
14238 && REG_P (XEXP (SET_DEST (prev_set
), 0))
14239 && REG_P (XEXP (SET_DEST (curr_set
), 0))
14240 && REGNO (XEXP (SET_DEST (prev_set
), 0))
14241 == REGNO (XEXP (SET_DEST (curr_set
), 0))
14242 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
14243 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
14244 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
14245 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
14246 && CONST_INT_P (SET_SRC (prev_set
))
14247 && CONST_INT_P (SET_SRC (curr_set
)))
14251 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR
))
14253 /* We're trying to match:
14254 prev (adrp) == (set (reg r0)
14255 (high (symbol_ref ("SYM"))))
14256 curr (ldr) == (set (reg r1)
14257 (mem (lo_sum (reg r0)
14258 (symbol_ref ("SYM")))))
14260 curr (ldr) == (set (reg r1)
14263 (symbol_ref ("SYM")))))) */
14264 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
14265 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
14267 rtx curr_src
= SET_SRC (curr_set
);
14269 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
14270 curr_src
= XEXP (curr_src
, 0);
14272 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
14273 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
14274 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
14275 == REGNO (SET_DEST (prev_set
))
14276 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
14277 XEXP (SET_SRC (prev_set
), 0)))
14282 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC
)
14283 && aarch_crypto_can_dual_issue (prev
, curr
))
14286 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH
)
14287 && any_condjump_p (curr
))
14289 enum attr_type prev_type
= get_attr_type (prev
);
14291 unsigned int condreg1
, condreg2
;
14293 aarch64_fixed_condition_code_regs (&condreg1
, &condreg2
);
14294 cc_reg_1
= gen_rtx_REG (CCmode
, condreg1
);
14296 if (reg_referenced_p (cc_reg_1
, PATTERN (curr
))
14298 && modified_in_p (cc_reg_1
, prev
))
14300 /* FIXME: this misses some which is considered simple arthematic
14301 instructions for ThunderX. Simple shifts are missed here. */
14302 if (prev_type
== TYPE_ALUS_SREG
14303 || prev_type
== TYPE_ALUS_IMM
14304 || prev_type
== TYPE_LOGICS_REG
14305 || prev_type
== TYPE_LOGICS_IMM
)
14312 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH
)
14313 && any_condjump_p (curr
))
14315 /* We're trying to match:
14316 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
14317 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
14319 (label_ref ("SYM"))
14321 if (SET_DEST (curr_set
) == (pc_rtx
)
14322 && GET_CODE (SET_SRC (curr_set
)) == IF_THEN_ELSE
14323 && REG_P (XEXP (XEXP (SET_SRC (curr_set
), 0), 0))
14324 && REG_P (SET_DEST (prev_set
))
14325 && REGNO (SET_DEST (prev_set
))
14326 == REGNO (XEXP (XEXP (SET_SRC (curr_set
), 0), 0)))
14328 /* Fuse ALU operations followed by conditional branch instruction. */
14329 switch (get_attr_type (prev
))
14332 case TYPE_ALU_SREG
:
14335 case TYPE_ADCS_REG
:
14336 case TYPE_ADCS_IMM
:
14337 case TYPE_LOGIC_REG
:
14338 case TYPE_LOGIC_IMM
:
14342 case TYPE_SHIFT_REG
:
14343 case TYPE_SHIFT_IMM
:
14358 /* Return true iff the instruction fusion described by OP is enabled. */
14361 aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op
)
14363 return (aarch64_tune_params
.fusible_ops
& op
) != 0;
14366 /* If MEM is in the form of [base+offset], extract the two parts
14367 of address and set to BASE and OFFSET, otherwise return false
14368 after clearing BASE and OFFSET. */
14371 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
14375 gcc_assert (MEM_P (mem
));
14377 addr
= XEXP (mem
, 0);
14382 *offset
= const0_rtx
;
14386 if (GET_CODE (addr
) == PLUS
14387 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
14389 *base
= XEXP (addr
, 0);
14390 *offset
= XEXP (addr
, 1);
14395 *offset
= NULL_RTX
;
14400 /* Types for scheduling fusion. */
14401 enum sched_fusion_type
14403 SCHED_FUSION_NONE
= 0,
14404 SCHED_FUSION_LD_SIGN_EXTEND
,
14405 SCHED_FUSION_LD_ZERO_EXTEND
,
14411 /* If INSN is a load or store of address in the form of [base+offset],
14412 extract the two parts and set to BASE and OFFSET. Return scheduling
14413 fusion type this INSN is. */
14415 static enum sched_fusion_type
14416 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
14419 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
14421 gcc_assert (INSN_P (insn
));
14422 x
= PATTERN (insn
);
14423 if (GET_CODE (x
) != SET
)
14424 return SCHED_FUSION_NONE
;
14427 dest
= SET_DEST (x
);
14429 machine_mode dest_mode
= GET_MODE (dest
);
14431 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode
))
14432 return SCHED_FUSION_NONE
;
14434 if (GET_CODE (src
) == SIGN_EXTEND
)
14436 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
14437 src
= XEXP (src
, 0);
14438 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14439 return SCHED_FUSION_NONE
;
14441 else if (GET_CODE (src
) == ZERO_EXTEND
)
14443 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
14444 src
= XEXP (src
, 0);
14445 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14446 return SCHED_FUSION_NONE
;
14449 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
14450 extract_base_offset_in_addr (src
, base
, offset
);
14451 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
14453 fusion
= SCHED_FUSION_ST
;
14454 extract_base_offset_in_addr (dest
, base
, offset
);
14457 return SCHED_FUSION_NONE
;
14459 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
14460 fusion
= SCHED_FUSION_NONE
;
14465 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14467 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14468 and PRI are only calculated for these instructions. For other instruction,
14469 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14470 type instruction fusion can be added by returning different priorities.
14472 It's important that irrelevant instructions get the largest FUSION_PRI. */
14475 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
14476 int *fusion_pri
, int *pri
)
14480 enum sched_fusion_type fusion
;
14482 gcc_assert (INSN_P (insn
));
14485 fusion
= fusion_load_store (insn
, &base
, &offset
);
14486 if (fusion
== SCHED_FUSION_NONE
)
14493 /* Set FUSION_PRI according to fusion type and base register. */
14494 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
14496 /* Calculate PRI. */
14499 /* INSN with smaller offset goes first. */
14500 off_val
= (int)(INTVAL (offset
));
14502 tmp
-= (off_val
& 0xfffff);
14504 tmp
+= ((- off_val
) & 0xfffff);
14510 /* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14511 Adjust priority of sha1h instructions so they are scheduled before
14512 other SHA1 instructions. */
14515 aarch64_sched_adjust_priority (rtx_insn
*insn
, int priority
)
14517 rtx x
= PATTERN (insn
);
14519 if (GET_CODE (x
) == SET
)
14523 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SHA1H
)
14524 return priority
+ 10;
14530 /* Given OPERANDS of consecutive load/store, check if we can merge
14531 them into ldp/stp. LOAD is true if they are load instructions.
14532 MODE is the mode of memory operands. */
14535 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
14538 HOST_WIDE_INT offval_1
, offval_2
, msize
;
14539 enum reg_class rclass_1
, rclass_2
;
14540 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
14544 mem_1
= operands
[1];
14545 mem_2
= operands
[3];
14546 reg_1
= operands
[0];
14547 reg_2
= operands
[2];
14548 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
14549 if (REGNO (reg_1
) == REGNO (reg_2
))
14554 mem_1
= operands
[0];
14555 mem_2
= operands
[2];
14556 reg_1
= operands
[1];
14557 reg_2
= operands
[3];
14560 /* The mems cannot be volatile. */
14561 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
14564 /* If we have SImode and slow unaligned ldp,
14565 check the alignment to be at least 8 byte. */
14567 && (aarch64_tune_params
.extra_tuning_flags
14568 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14570 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14573 /* Check if the addresses are in the form of [base+offset]. */
14574 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14575 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14577 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14578 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14581 /* Check if the bases are same. */
14582 if (!rtx_equal_p (base_1
, base_2
))
14585 offval_1
= INTVAL (offset_1
);
14586 offval_2
= INTVAL (offset_2
);
14587 msize
= GET_MODE_SIZE (mode
);
14588 /* Check if the offsets are consecutive. */
14589 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
14592 /* Check if the addresses are clobbered by load. */
14595 if (reg_mentioned_p (reg_1
, mem_1
))
14598 /* In increasing order, the last load can clobber the address. */
14599 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
14603 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14604 rclass_1
= FP_REGS
;
14606 rclass_1
= GENERAL_REGS
;
14608 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14609 rclass_2
= FP_REGS
;
14611 rclass_2
= GENERAL_REGS
;
14613 /* Check if the registers are of same class. */
14614 if (rclass_1
!= rclass_2
)
14620 /* Given OPERANDS of consecutive load/store, check if we can merge
14621 them into ldp/stp by adjusting the offset. LOAD is true if they
14622 are load instructions. MODE is the mode of memory operands.
14624 Given below consecutive stores:
14626 str w1, [xb, 0x100]
14627 str w1, [xb, 0x104]
14628 str w1, [xb, 0x108]
14629 str w1, [xb, 0x10c]
14631 Though the offsets are out of the range supported by stp, we can
14632 still pair them after adjusting the offset, like:
14634 add scratch, xb, 0x100
14635 stp w1, w1, [scratch]
14636 stp w1, w1, [scratch, 0x8]
14638 The peephole patterns detecting this opportunity should guarantee
14639 the scratch register is avaliable. */
14642 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
14645 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
14646 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
14647 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
14648 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
14652 reg_1
= operands
[0];
14653 mem_1
= operands
[1];
14654 reg_2
= operands
[2];
14655 mem_2
= operands
[3];
14656 reg_3
= operands
[4];
14657 mem_3
= operands
[5];
14658 reg_4
= operands
[6];
14659 mem_4
= operands
[7];
14660 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
14661 && REG_P (reg_3
) && REG_P (reg_4
));
14662 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
14667 mem_1
= operands
[0];
14668 reg_1
= operands
[1];
14669 mem_2
= operands
[2];
14670 reg_2
= operands
[3];
14671 mem_3
= operands
[4];
14672 reg_3
= operands
[5];
14673 mem_4
= operands
[6];
14674 reg_4
= operands
[7];
14676 /* Skip if memory operand is by itslef valid for ldp/stp. */
14677 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
14680 /* The mems cannot be volatile. */
14681 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
14682 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
14685 /* Check if the addresses are in the form of [base+offset]. */
14686 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14687 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14689 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14690 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14692 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
14693 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
14695 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
14696 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
14699 /* Check if the bases are same. */
14700 if (!rtx_equal_p (base_1
, base_2
)
14701 || !rtx_equal_p (base_2
, base_3
)
14702 || !rtx_equal_p (base_3
, base_4
))
14705 offval_1
= INTVAL (offset_1
);
14706 offval_2
= INTVAL (offset_2
);
14707 offval_3
= INTVAL (offset_3
);
14708 offval_4
= INTVAL (offset_4
);
14709 msize
= GET_MODE_SIZE (mode
);
14710 /* Check if the offsets are consecutive. */
14711 if ((offval_1
!= (offval_2
+ msize
)
14712 || offval_1
!= (offval_3
+ msize
* 2)
14713 || offval_1
!= (offval_4
+ msize
* 3))
14714 && (offval_4
!= (offval_3
+ msize
)
14715 || offval_4
!= (offval_2
+ msize
* 2)
14716 || offval_4
!= (offval_1
+ msize
* 3)))
14719 /* Check if the addresses are clobbered by load. */
14722 if (reg_mentioned_p (reg_1
, mem_1
)
14723 || reg_mentioned_p (reg_2
, mem_2
)
14724 || reg_mentioned_p (reg_3
, mem_3
))
14727 /* In increasing order, the last load can clobber the address. */
14728 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
14732 /* If we have SImode and slow unaligned ldp,
14733 check the alignment to be at least 8 byte. */
14735 && (aarch64_tune_params
.extra_tuning_flags
14736 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14738 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14741 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14742 rclass_1
= FP_REGS
;
14744 rclass_1
= GENERAL_REGS
;
14746 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14747 rclass_2
= FP_REGS
;
14749 rclass_2
= GENERAL_REGS
;
14751 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
14752 rclass_3
= FP_REGS
;
14754 rclass_3
= GENERAL_REGS
;
14756 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
14757 rclass_4
= FP_REGS
;
14759 rclass_4
= GENERAL_REGS
;
14761 /* Check if the registers are of same class. */
14762 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
14768 /* Given OPERANDS of consecutive load/store, this function pairs them
14769 into ldp/stp after adjusting the offset. It depends on the fact
14770 that addresses of load/store instructions are in increasing order.
14771 MODE is the mode of memory operands. CODE is the rtl operator
14772 which should be applied to all memory operands, it's SIGN_EXTEND,
14773 ZERO_EXTEND or UNKNOWN. */
14776 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
14777 scalar_mode mode
, RTX_CODE code
)
14779 rtx base
, offset
, t1
, t2
;
14780 rtx mem_1
, mem_2
, mem_3
, mem_4
;
14781 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
14785 mem_1
= operands
[1];
14786 mem_2
= operands
[3];
14787 mem_3
= operands
[5];
14788 mem_4
= operands
[7];
14792 mem_1
= operands
[0];
14793 mem_2
= operands
[2];
14794 mem_3
= operands
[4];
14795 mem_4
= operands
[6];
14796 gcc_assert (code
== UNKNOWN
);
14799 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
14800 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
14802 /* Adjust offset thus it can fit in ldp/stp instruction. */
14803 msize
= GET_MODE_SIZE (mode
);
14804 stp_off_limit
= msize
* 0x40;
14805 off_val
= INTVAL (offset
);
14806 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
14807 new_off
= abs_off
% stp_off_limit
;
14808 adj_off
= abs_off
- new_off
;
14810 /* Further adjust to make sure all offsets are OK. */
14811 if ((new_off
+ msize
* 2) >= stp_off_limit
)
14813 adj_off
+= stp_off_limit
;
14814 new_off
-= stp_off_limit
;
14817 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14818 if (adj_off
>= 0x1000)
14823 adj_off
= -adj_off
;
14824 new_off
= -new_off
;
14827 /* Create new memory references. */
14828 mem_1
= change_address (mem_1
, VOIDmode
,
14829 plus_constant (DImode
, operands
[8], new_off
));
14831 /* Check if the adjusted address is OK for ldp/stp. */
14832 if (!aarch64_mem_pair_operand (mem_1
, mode
))
14835 msize
= GET_MODE_SIZE (mode
);
14836 mem_2
= change_address (mem_2
, VOIDmode
,
14837 plus_constant (DImode
,
14840 mem_3
= change_address (mem_3
, VOIDmode
,
14841 plus_constant (DImode
,
14843 new_off
+ msize
* 2));
14844 mem_4
= change_address (mem_4
, VOIDmode
,
14845 plus_constant (DImode
,
14847 new_off
+ msize
* 3));
14849 if (code
== ZERO_EXTEND
)
14851 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
14852 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
14853 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
14854 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
14856 else if (code
== SIGN_EXTEND
)
14858 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
14859 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
14860 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
14861 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
14866 operands
[1] = mem_1
;
14867 operands
[3] = mem_2
;
14868 operands
[5] = mem_3
;
14869 operands
[7] = mem_4
;
14873 operands
[0] = mem_1
;
14874 operands
[2] = mem_2
;
14875 operands
[4] = mem_3
;
14876 operands
[6] = mem_4
;
14879 /* Emit adjusting instruction. */
14880 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
14881 /* Emit ldp/stp instructions. */
14882 t1
= gen_rtx_SET (operands
[0], operands
[1]);
14883 t2
= gen_rtx_SET (operands
[2], operands
[3]);
14884 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14885 t1
= gen_rtx_SET (operands
[4], operands
[5]);
14886 t2
= gen_rtx_SET (operands
[6], operands
[7]);
14887 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14891 /* Return 1 if pseudo register should be created and used to hold
14892 GOT address for PIC code. */
14895 aarch64_use_pseudo_pic_reg (void)
14897 return aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
;
14900 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14903 aarch64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
14905 switch (XINT (x
, 1))
14907 case UNSPEC_GOTSMALLPIC
:
14908 case UNSPEC_GOTSMALLPIC28K
:
14909 case UNSPEC_GOTTINYPIC
:
14915 return default_unspec_may_trap_p (x
, flags
);
14919 /* If X is a positive CONST_DOUBLE with a value that is a power of 2
14920 return the log2 of that value. Otherwise return -1. */
14923 aarch64_fpconst_pow_of_2 (rtx x
)
14925 const REAL_VALUE_TYPE
*r
;
14927 if (!CONST_DOUBLE_P (x
))
14930 r
= CONST_DOUBLE_REAL_VALUE (x
);
14932 if (REAL_VALUE_NEGATIVE (*r
)
14933 || REAL_VALUE_ISNAN (*r
)
14934 || REAL_VALUE_ISINF (*r
)
14935 || !real_isinteger (r
, DFmode
))
14938 return exact_log2 (real_to_integer (r
));
14941 /* If X is a vector of equal CONST_DOUBLE values and that value is
14942 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14945 aarch64_vec_fpconst_pow_of_2 (rtx x
)
14947 if (GET_CODE (x
) != CONST_VECTOR
)
14950 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
14953 int firstval
= aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, 0));
14957 for (int i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
14958 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, i
)) != firstval
)
14964 /* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14967 __fp16 always promotes through this hook.
14968 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14969 through the generic excess precision logic rather than here. */
14972 aarch64_promoted_type (const_tree t
)
14974 if (SCALAR_FLOAT_TYPE_P (t
)
14975 && TYPE_MAIN_VARIANT (t
) == aarch64_fp16_type_node
)
14976 return float_type_node
;
14981 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14984 aarch64_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
14985 optimization_type opt_type
)
14990 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p (mode1
);
14997 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14998 if MODE is HFmode, and punt to the generic implementation otherwise. */
15001 aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
15003 return (mode
== HFmode
15005 : default_libgcc_floating_mode_supported_p (mode
));
15008 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
15009 if MODE is HFmode, and punt to the generic implementation otherwise. */
15012 aarch64_scalar_mode_supported_p (scalar_mode mode
)
15014 return (mode
== HFmode
15016 : default_scalar_mode_supported_p (mode
));
15019 /* Set the value of FLT_EVAL_METHOD.
15020 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
15022 0: evaluate all operations and constants, whose semantic type has at
15023 most the range and precision of type float, to the range and
15024 precision of float; evaluate all other operations and constants to
15025 the range and precision of the semantic type;
15027 N, where _FloatN is a supported interchange floating type
15028 evaluate all operations and constants, whose semantic type has at
15029 most the range and precision of _FloatN type, to the range and
15030 precision of the _FloatN type; evaluate all other operations and
15031 constants to the range and precision of the semantic type;
15033 If we have the ARMv8.2-A extensions then we support _Float16 in native
15034 precision, so we should set this to 16. Otherwise, we support the type,
15035 but want to evaluate expressions in float precision, so set this to
15038 static enum flt_eval_method
15039 aarch64_excess_precision (enum excess_precision_type type
)
15043 case EXCESS_PRECISION_TYPE_FAST
:
15044 case EXCESS_PRECISION_TYPE_STANDARD
:
15045 /* We can calculate either in 16-bit range and precision or
15046 32-bit range and precision. Make that decision based on whether
15047 we have native support for the ARMv8.2-A 16-bit floating-point
15048 instructions or not. */
15049 return (TARGET_FP_F16INST
15050 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
15051 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
15052 case EXCESS_PRECISION_TYPE_IMPLICIT
:
15053 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
15055 gcc_unreachable ();
15057 return FLT_EVAL_METHOD_UNPREDICTABLE
;
15060 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
15061 scheduled for speculative execution. Reject the long-running division
15062 and square-root instructions. */
15065 aarch64_sched_can_speculate_insn (rtx_insn
*insn
)
15067 switch (get_attr_type (insn
))
15075 case TYPE_NEON_FP_SQRT_S
:
15076 case TYPE_NEON_FP_SQRT_D
:
15077 case TYPE_NEON_FP_SQRT_S_Q
:
15078 case TYPE_NEON_FP_SQRT_D_Q
:
15079 case TYPE_NEON_FP_DIV_S
:
15080 case TYPE_NEON_FP_DIV_D
:
15081 case TYPE_NEON_FP_DIV_S_Q
:
15082 case TYPE_NEON_FP_DIV_D_Q
:
15089 /* Target-specific selftests. */
15093 namespace selftest
{
15095 /* Selftest for the RTL loader.
15096 Verify that the RTL loader copes with a dump from
15097 print_rtx_function. This is essentially just a test that class
15098 function_reader can handle a real dump, but it also verifies
15099 that lookup_reg_by_dump_name correctly handles hard regs.
15100 The presence of hard reg names in the dump means that the test is
15101 target-specific, hence it is in this file. */
15104 aarch64_test_loading_full_dump ()
15106 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("aarch64/times-two.rtl"));
15108 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
15110 rtx_insn
*insn_1
= get_insn_by_uid (1);
15111 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
15113 rtx_insn
*insn_15
= get_insn_by_uid (15);
15114 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
15115 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
15117 /* Verify crtl->return_rtx. */
15118 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
15119 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
15120 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
15123 /* Run all target-specific selftests. */
15126 aarch64_run_selftests (void)
15128 aarch64_test_loading_full_dump ();
15131 } // namespace selftest
15133 #endif /* #if CHECKING_P */
15135 #undef TARGET_ADDRESS_COST
15136 #define TARGET_ADDRESS_COST aarch64_address_cost
15138 /* This hook will determines whether unnamed bitfields affect the alignment
15139 of the containing structure. The hook returns true if the structure
15140 should inherit the alignment requirements of an unnamed bitfield's
15142 #undef TARGET_ALIGN_ANON_BITFIELD
15143 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
15145 #undef TARGET_ASM_ALIGNED_DI_OP
15146 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
15148 #undef TARGET_ASM_ALIGNED_HI_OP
15149 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
15151 #undef TARGET_ASM_ALIGNED_SI_OP
15152 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
15154 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15155 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15156 hook_bool_const_tree_hwi_hwi_const_tree_true
15158 #undef TARGET_ASM_FILE_START
15159 #define TARGET_ASM_FILE_START aarch64_start_file
15161 #undef TARGET_ASM_OUTPUT_MI_THUNK
15162 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15164 #undef TARGET_ASM_SELECT_RTX_SECTION
15165 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15167 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15168 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15170 #undef TARGET_BUILD_BUILTIN_VA_LIST
15171 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15173 #undef TARGET_CALLEE_COPIES
15174 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15176 #undef TARGET_CAN_ELIMINATE
15177 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15179 #undef TARGET_CAN_INLINE_P
15180 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
15182 #undef TARGET_CANNOT_FORCE_CONST_MEM
15183 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15185 #undef TARGET_CASE_VALUES_THRESHOLD
15186 #define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15188 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15189 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15191 /* Only the least significant bit is used for initialization guard
15193 #undef TARGET_CXX_GUARD_MASK_BIT
15194 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15196 #undef TARGET_C_MODE_FOR_SUFFIX
15197 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15199 #ifdef TARGET_BIG_ENDIAN_DEFAULT
15200 #undef TARGET_DEFAULT_TARGET_FLAGS
15201 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15204 #undef TARGET_CLASS_MAX_NREGS
15205 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15207 #undef TARGET_BUILTIN_DECL
15208 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
15210 #undef TARGET_BUILTIN_RECIPROCAL
15211 #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15213 #undef TARGET_C_EXCESS_PRECISION
15214 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15216 #undef TARGET_EXPAND_BUILTIN
15217 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15219 #undef TARGET_EXPAND_BUILTIN_VA_START
15220 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15222 #undef TARGET_FOLD_BUILTIN
15223 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15225 #undef TARGET_FUNCTION_ARG
15226 #define TARGET_FUNCTION_ARG aarch64_function_arg
15228 #undef TARGET_FUNCTION_ARG_ADVANCE
15229 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15231 #undef TARGET_FUNCTION_ARG_BOUNDARY
15232 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15234 #undef TARGET_FUNCTION_ARG_PADDING
15235 #define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
15237 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15238 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15240 #undef TARGET_FUNCTION_VALUE
15241 #define TARGET_FUNCTION_VALUE aarch64_function_value
15243 #undef TARGET_FUNCTION_VALUE_REGNO_P
15244 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15246 #undef TARGET_GIMPLE_FOLD_BUILTIN
15247 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
15249 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15250 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15252 #undef TARGET_INIT_BUILTINS
15253 #define TARGET_INIT_BUILTINS aarch64_init_builtins
15255 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15256 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15257 aarch64_ira_change_pseudo_allocno_class
15259 #undef TARGET_LEGITIMATE_ADDRESS_P
15260 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15262 #undef TARGET_LEGITIMATE_CONSTANT_P
15263 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15265 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15266 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15267 aarch64_legitimize_address_displacement
15269 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15270 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15272 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15273 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15274 aarch64_libgcc_floating_mode_supported_p
15276 #undef TARGET_MANGLE_TYPE
15277 #define TARGET_MANGLE_TYPE aarch64_mangle_type
15279 #undef TARGET_MEMORY_MOVE_COST
15280 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15282 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15283 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15285 #undef TARGET_MUST_PASS_IN_STACK
15286 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15288 /* This target hook should return true if accesses to volatile bitfields
15289 should use the narrowest mode possible. It should return false if these
15290 accesses should use the bitfield container type. */
15291 #undef TARGET_NARROW_VOLATILE_BITFIELD
15292 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15294 #undef TARGET_OPTION_OVERRIDE
15295 #define TARGET_OPTION_OVERRIDE aarch64_override_options
15297 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15298 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15299 aarch64_override_options_after_change
15301 #undef TARGET_OPTION_SAVE
15302 #define TARGET_OPTION_SAVE aarch64_option_save
15304 #undef TARGET_OPTION_RESTORE
15305 #define TARGET_OPTION_RESTORE aarch64_option_restore
15307 #undef TARGET_OPTION_PRINT
15308 #define TARGET_OPTION_PRINT aarch64_option_print
15310 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15311 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15313 #undef TARGET_SET_CURRENT_FUNCTION
15314 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15316 #undef TARGET_PASS_BY_REFERENCE
15317 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15319 #undef TARGET_PREFERRED_RELOAD_CLASS
15320 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15322 #undef TARGET_SCHED_REASSOCIATION_WIDTH
15323 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15325 #undef TARGET_PROMOTED_TYPE
15326 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
15328 #undef TARGET_SECONDARY_RELOAD
15329 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15331 #undef TARGET_SHIFT_TRUNCATION_MASK
15332 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15334 #undef TARGET_SETUP_INCOMING_VARARGS
15335 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15337 #undef TARGET_STRUCT_VALUE_RTX
15338 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15340 #undef TARGET_REGISTER_MOVE_COST
15341 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15343 #undef TARGET_RETURN_IN_MEMORY
15344 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15346 #undef TARGET_RETURN_IN_MSB
15347 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15349 #undef TARGET_RTX_COSTS
15350 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
15352 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15353 #define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15355 #undef TARGET_SCHED_ISSUE_RATE
15356 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15358 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15359 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15360 aarch64_sched_first_cycle_multipass_dfa_lookahead
15362 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15363 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15364 aarch64_first_cycle_multipass_dfa_lookahead_guard
15366 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15367 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15368 aarch64_get_separate_components
15370 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15371 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15372 aarch64_components_for_bb
15374 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15375 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15376 aarch64_disqualify_components
15378 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15379 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15380 aarch64_emit_prologue_components
15382 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15383 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15384 aarch64_emit_epilogue_components
15386 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15387 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15388 aarch64_set_handled_components
15390 #undef TARGET_TRAMPOLINE_INIT
15391 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15393 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15394 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15396 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15397 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15399 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15400 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15401 aarch64_builtin_support_vector_misalignment
15403 #undef TARGET_ARRAY_MODE_SUPPORTED_P
15404 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15406 #undef TARGET_VECTORIZE_ADD_STMT_COST
15407 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15409 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15410 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15411 aarch64_builtin_vectorization_cost
15413 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15414 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15416 #undef TARGET_VECTORIZE_BUILTINS
15417 #define TARGET_VECTORIZE_BUILTINS
15419 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15420 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15421 aarch64_builtin_vectorized_function
15423 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15424 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15425 aarch64_autovectorize_vector_sizes
15427 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15428 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15429 aarch64_atomic_assign_expand_fenv
15431 /* Section anchor support. */
15433 #undef TARGET_MIN_ANCHOR_OFFSET
15434 #define TARGET_MIN_ANCHOR_OFFSET -256
15436 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15437 byte offset; we can do much more for larger data types, but have no way
15438 to determine the size of the access. We assume accesses are aligned. */
15439 #undef TARGET_MAX_ANCHOR_OFFSET
15440 #define TARGET_MAX_ANCHOR_OFFSET 4095
15442 #undef TARGET_VECTOR_ALIGNMENT
15443 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15445 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15446 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15447 aarch64_simd_vector_alignment_reachable
15449 /* vec_perm support. */
15451 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15452 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15453 aarch64_vectorize_vec_perm_const_ok
15455 #undef TARGET_INIT_LIBFUNCS
15456 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
15458 #undef TARGET_FIXED_CONDITION_CODE_REGS
15459 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15461 #undef TARGET_FLAGS_REGNUM
15462 #define TARGET_FLAGS_REGNUM CC_REGNUM
15464 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15465 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15467 #undef TARGET_ASAN_SHADOW_OFFSET
15468 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15470 #undef TARGET_LEGITIMIZE_ADDRESS
15471 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15473 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15474 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15475 aarch64_use_by_pieces_infrastructure_p
15477 #undef TARGET_SCHED_CAN_SPECULATE_INSN
15478 #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15480 #undef TARGET_CAN_USE_DOLOOP_P
15481 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15483 #undef TARGET_SCHED_ADJUST_PRIORITY
15484 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15486 #undef TARGET_SCHED_MACRO_FUSION_P
15487 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15489 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15490 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15492 #undef TARGET_SCHED_FUSION_PRIORITY
15493 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15495 #undef TARGET_UNSPEC_MAY_TRAP_P
15496 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15498 #undef TARGET_USE_PSEUDO_PIC_REG
15499 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15501 #undef TARGET_PRINT_OPERAND
15502 #define TARGET_PRINT_OPERAND aarch64_print_operand
15504 #undef TARGET_PRINT_OPERAND_ADDRESS
15505 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15507 #undef TARGET_OPTAB_SUPPORTED_P
15508 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15510 #undef TARGET_OMIT_STRUCT_RETURN_REG
15511 #define TARGET_OMIT_STRUCT_RETURN_REG true
15513 /* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15514 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15515 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15517 #undef TARGET_HARD_REGNO_NREGS
15518 #define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
15519 #undef TARGET_HARD_REGNO_MODE_OK
15520 #define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
15522 #undef TARGET_MODES_TIEABLE_P
15523 #define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
15525 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
15526 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
15527 aarch64_hard_regno_call_part_clobbered
15529 #undef TARGET_CONSTANT_ALIGNMENT
15530 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
15533 #undef TARGET_RUN_TARGET_SELFTESTS
15534 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15535 #endif /* #if CHECKING_P */
15537 struct gcc_target targetm
= TARGET_INITIALIZER
;
15539 #include "gt-aarch64.h"