1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define INCLUDE_STRING
24 #include "coretypes.h"
35 #include "stringpool.h"
40 #include "diagnostic.h"
41 #include "insn-attr.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
52 #include "langhooks.h"
57 #include "gimple-iterator.h"
58 #include "tree-vectorizer.h"
59 #include "aarch64-cost-tables.h"
63 #include "tm-constrs.h"
64 #include "sched-int.h"
65 #include "target-globals.h"
66 #include "common/common-target.h"
68 #include "selftest-rtl.h"
70 /* This file should be included last. */
71 #include "target-def.h"
73 /* Defined for convenience. */
74 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
76 /* Classifies an address.
79 A simple base register plus immediate offset.
82 A base register indexed by immediate offset with writeback.
85 A base register indexed by (optionally scaled) register.
88 A base register indexed by (optionally scaled) zero-extended register.
91 A base register indexed by (optionally scaled) sign-extended register.
94 A LO_SUM rtx with a base register and "LO12" symbol relocation.
97 A constant symbolic address, in pc-relative literal pool. */
99 enum aarch64_address_type
{
109 struct aarch64_address_info
{
110 enum aarch64_address_type type
;
114 enum aarch64_symbol_type symbol_type
;
117 struct simd_immediate_info
126 /* The current code model. */
127 enum aarch64_code_model aarch64_cmodel
;
130 #undef TARGET_HAVE_TLS
131 #define TARGET_HAVE_TLS 1
134 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
135 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
137 machine_mode
*, int *,
139 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
140 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
141 static void aarch64_override_options_after_change (void);
142 static bool aarch64_vector_mode_supported_p (machine_mode
);
143 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
144 const unsigned char *sel
);
145 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
146 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode
,
151 /* Major revision number of the ARM Architecture implemented by the target. */
152 unsigned aarch64_architecture_version
;
154 /* The processor for which instructions should be scheduled. */
155 enum aarch64_processor aarch64_tune
= cortexa53
;
157 /* Mask to specify which instruction scheduling options should be used. */
158 unsigned long aarch64_tune_flags
= 0;
160 /* Global flag for PC relative loads. */
161 bool aarch64_pcrelative_literal_loads
;
163 /* Support for command line parsing of boolean flags in the tuning
165 struct aarch64_flag_desc
171 #define AARCH64_FUSION_PAIR(name, internal_name) \
172 { name, AARCH64_FUSE_##internal_name },
173 static const struct aarch64_flag_desc aarch64_fusible_pairs
[] =
175 { "none", AARCH64_FUSE_NOTHING
},
176 #include "aarch64-fusion-pairs.def"
177 { "all", AARCH64_FUSE_ALL
},
178 { NULL
, AARCH64_FUSE_NOTHING
}
181 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
182 { name, AARCH64_EXTRA_TUNE_##internal_name },
183 static const struct aarch64_flag_desc aarch64_tuning_flags
[] =
185 { "none", AARCH64_EXTRA_TUNE_NONE
},
186 #include "aarch64-tuning-flags.def"
187 { "all", AARCH64_EXTRA_TUNE_ALL
},
188 { NULL
, AARCH64_EXTRA_TUNE_NONE
}
191 /* Tuning parameters. */
193 static const struct cpu_addrcost_table generic_addrcost_table
=
203 0, /* register_offset */
204 0, /* register_sextend */
205 0, /* register_zextend */
209 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
219 0, /* register_offset */
220 0, /* register_sextend */
221 0, /* register_zextend */
225 static const struct cpu_addrcost_table exynosm1_addrcost_table
=
235 1, /* register_offset */
236 1, /* register_sextend */
237 2, /* register_zextend */
241 static const struct cpu_addrcost_table xgene1_addrcost_table
=
251 0, /* register_offset */
252 1, /* register_sextend */
253 1, /* register_zextend */
257 static const struct cpu_addrcost_table qdf24xx_addrcost_table
=
267 0, /* register_offset */
268 0, /* register_sextend */
269 0, /* register_zextend */
273 static const struct cpu_addrcost_table thunderx2t99_addrcost_table
=
283 2, /* register_offset */
284 3, /* register_sextend */
285 3, /* register_zextend */
289 static const struct cpu_regmove_cost generic_regmove_cost
=
292 /* Avoid the use of slow int<->fp moves for spilling by setting
293 their cost higher than memmov_cost. */
299 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
302 /* Avoid the use of slow int<->fp moves for spilling by setting
303 their cost higher than memmov_cost. */
309 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
312 /* Avoid the use of slow int<->fp moves for spilling by setting
313 their cost higher than memmov_cost. */
319 static const struct cpu_regmove_cost exynosm1_regmove_cost
=
322 /* Avoid the use of slow int<->fp moves for spilling by setting
323 their cost higher than memmov_cost (actual, 4 and 9). */
329 static const struct cpu_regmove_cost thunderx_regmove_cost
=
337 static const struct cpu_regmove_cost xgene1_regmove_cost
=
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
347 static const struct cpu_regmove_cost qdf24xx_regmove_cost
=
350 /* Avoid the use of int<->fp moves for spilling. */
356 static const struct cpu_regmove_cost thunderx2t99_regmove_cost
=
359 /* Avoid the use of int<->fp moves for spilling. */
365 /* Generic costs for vector insn classes. */
366 static const struct cpu_vector_cost generic_vector_cost
=
368 1, /* scalar_int_stmt_cost */
369 1, /* scalar_fp_stmt_cost */
370 1, /* scalar_load_cost */
371 1, /* scalar_store_cost */
372 1, /* vec_int_stmt_cost */
373 1, /* vec_fp_stmt_cost */
374 2, /* vec_permute_cost */
375 1, /* vec_to_scalar_cost */
376 1, /* scalar_to_vec_cost */
377 1, /* vec_align_load_cost */
378 1, /* vec_unalign_load_cost */
379 1, /* vec_unalign_store_cost */
380 1, /* vec_store_cost */
381 3, /* cond_taken_branch_cost */
382 1 /* cond_not_taken_branch_cost */
385 /* ThunderX costs for vector insn classes. */
386 static const struct cpu_vector_cost thunderx_vector_cost
=
388 1, /* scalar_int_stmt_cost */
389 1, /* scalar_fp_stmt_cost */
390 3, /* scalar_load_cost */
391 1, /* scalar_store_cost */
392 4, /* vec_int_stmt_cost */
393 4, /* vec_fp_stmt_cost */
394 4, /* vec_permute_cost */
395 2, /* vec_to_scalar_cost */
396 2, /* scalar_to_vec_cost */
397 3, /* vec_align_load_cost */
398 10, /* vec_unalign_load_cost */
399 10, /* vec_unalign_store_cost */
400 1, /* vec_store_cost */
401 3, /* cond_taken_branch_cost */
402 3 /* cond_not_taken_branch_cost */
405 /* Generic costs for vector insn classes. */
406 static const struct cpu_vector_cost cortexa57_vector_cost
=
408 1, /* scalar_int_stmt_cost */
409 1, /* scalar_fp_stmt_cost */
410 4, /* scalar_load_cost */
411 1, /* scalar_store_cost */
412 2, /* vec_int_stmt_cost */
413 2, /* vec_fp_stmt_cost */
414 3, /* vec_permute_cost */
415 8, /* vec_to_scalar_cost */
416 8, /* scalar_to_vec_cost */
417 4, /* vec_align_load_cost */
418 4, /* vec_unalign_load_cost */
419 1, /* vec_unalign_store_cost */
420 1, /* vec_store_cost */
421 1, /* cond_taken_branch_cost */
422 1 /* cond_not_taken_branch_cost */
425 static const struct cpu_vector_cost exynosm1_vector_cost
=
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
429 5, /* scalar_load_cost */
430 1, /* scalar_store_cost */
431 3, /* vec_int_stmt_cost */
432 3, /* vec_fp_stmt_cost */
433 3, /* vec_permute_cost */
434 3, /* vec_to_scalar_cost */
435 3, /* scalar_to_vec_cost */
436 5, /* vec_align_load_cost */
437 5, /* vec_unalign_load_cost */
438 1, /* vec_unalign_store_cost */
439 1, /* vec_store_cost */
440 1, /* cond_taken_branch_cost */
441 1 /* cond_not_taken_branch_cost */
444 /* Generic costs for vector insn classes. */
445 static const struct cpu_vector_cost xgene1_vector_cost
=
447 1, /* scalar_int_stmt_cost */
448 1, /* scalar_fp_stmt_cost */
449 5, /* scalar_load_cost */
450 1, /* scalar_store_cost */
451 2, /* vec_int_stmt_cost */
452 2, /* vec_fp_stmt_cost */
453 2, /* vec_permute_cost */
454 4, /* vec_to_scalar_cost */
455 4, /* scalar_to_vec_cost */
456 10, /* vec_align_load_cost */
457 10, /* vec_unalign_load_cost */
458 2, /* vec_unalign_store_cost */
459 2, /* vec_store_cost */
460 2, /* cond_taken_branch_cost */
461 1 /* cond_not_taken_branch_cost */
464 /* Costs for vector insn classes for Vulcan. */
465 static const struct cpu_vector_cost thunderx2t99_vector_cost
=
467 1, /* scalar_int_stmt_cost */
468 6, /* scalar_fp_stmt_cost */
469 4, /* scalar_load_cost */
470 1, /* scalar_store_cost */
471 5, /* vec_int_stmt_cost */
472 6, /* vec_fp_stmt_cost */
473 3, /* vec_permute_cost */
474 6, /* vec_to_scalar_cost */
475 5, /* scalar_to_vec_cost */
476 8, /* vec_align_load_cost */
477 8, /* vec_unalign_load_cost */
478 4, /* vec_unalign_store_cost */
479 4, /* vec_store_cost */
480 2, /* cond_taken_branch_cost */
481 1 /* cond_not_taken_branch_cost */
484 /* Generic costs for branch instructions. */
485 static const struct cpu_branch_cost generic_branch_cost
=
487 1, /* Predictable. */
488 3 /* Unpredictable. */
491 /* Branch costs for Cortex-A57. */
492 static const struct cpu_branch_cost cortexa57_branch_cost
=
494 1, /* Predictable. */
495 3 /* Unpredictable. */
498 /* Branch costs for Vulcan. */
499 static const struct cpu_branch_cost thunderx2t99_branch_cost
=
501 1, /* Predictable. */
502 3 /* Unpredictable. */
505 /* Generic approximation modes. */
506 static const cpu_approx_modes generic_approx_modes
=
508 AARCH64_APPROX_NONE
, /* division */
509 AARCH64_APPROX_NONE
, /* sqrt */
510 AARCH64_APPROX_NONE
/* recip_sqrt */
513 /* Approximation modes for Exynos M1. */
514 static const cpu_approx_modes exynosm1_approx_modes
=
516 AARCH64_APPROX_NONE
, /* division */
517 AARCH64_APPROX_ALL
, /* sqrt */
518 AARCH64_APPROX_ALL
/* recip_sqrt */
521 /* Approximation modes for X-Gene 1. */
522 static const cpu_approx_modes xgene1_approx_modes
=
524 AARCH64_APPROX_NONE
, /* division */
525 AARCH64_APPROX_NONE
, /* sqrt */
526 AARCH64_APPROX_ALL
/* recip_sqrt */
529 static const struct tune_params generic_tunings
=
531 &cortexa57_extra_costs
,
532 &generic_addrcost_table
,
533 &generic_regmove_cost
,
534 &generic_vector_cost
,
535 &generic_branch_cost
,
536 &generic_approx_modes
,
539 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
540 8, /* function_align. */
543 2, /* int_reassoc_width. */
544 4, /* fp_reassoc_width. */
545 1, /* vec_reassoc_width. */
546 2, /* min_div_recip_mul_sf. */
547 2, /* min_div_recip_mul_df. */
548 0, /* max_case_values. */
549 0, /* cache_line_size. */
550 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
551 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
554 static const struct tune_params cortexa35_tunings
=
556 &cortexa53_extra_costs
,
557 &generic_addrcost_table
,
558 &cortexa53_regmove_cost
,
559 &generic_vector_cost
,
560 &cortexa57_branch_cost
,
561 &generic_approx_modes
,
564 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
565 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
566 16, /* function_align. */
569 2, /* int_reassoc_width. */
570 4, /* fp_reassoc_width. */
571 1, /* vec_reassoc_width. */
572 2, /* min_div_recip_mul_sf. */
573 2, /* min_div_recip_mul_df. */
574 0, /* max_case_values. */
575 0, /* cache_line_size. */
576 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
577 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
580 static const struct tune_params cortexa53_tunings
=
582 &cortexa53_extra_costs
,
583 &generic_addrcost_table
,
584 &cortexa53_regmove_cost
,
585 &generic_vector_cost
,
586 &cortexa57_branch_cost
,
587 &generic_approx_modes
,
590 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
591 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
592 16, /* function_align. */
595 2, /* int_reassoc_width. */
596 4, /* fp_reassoc_width. */
597 1, /* vec_reassoc_width. */
598 2, /* min_div_recip_mul_sf. */
599 2, /* min_div_recip_mul_df. */
600 0, /* max_case_values. */
601 0, /* cache_line_size. */
602 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
603 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
606 static const struct tune_params cortexa57_tunings
=
608 &cortexa57_extra_costs
,
609 &cortexa57_addrcost_table
,
610 &cortexa57_regmove_cost
,
611 &cortexa57_vector_cost
,
612 &cortexa57_branch_cost
,
613 &generic_approx_modes
,
616 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
617 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
618 16, /* function_align. */
621 2, /* int_reassoc_width. */
622 4, /* fp_reassoc_width. */
623 1, /* vec_reassoc_width. */
624 2, /* min_div_recip_mul_sf. */
625 2, /* min_div_recip_mul_df. */
626 0, /* max_case_values. */
627 0, /* cache_line_size. */
628 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
629 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
) /* tune_flags. */
632 static const struct tune_params cortexa72_tunings
=
634 &cortexa57_extra_costs
,
635 &cortexa57_addrcost_table
,
636 &cortexa57_regmove_cost
,
637 &cortexa57_vector_cost
,
638 &cortexa57_branch_cost
,
639 &generic_approx_modes
,
642 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
643 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
644 16, /* function_align. */
647 2, /* int_reassoc_width. */
648 4, /* fp_reassoc_width. */
649 1, /* vec_reassoc_width. */
650 2, /* min_div_recip_mul_sf. */
651 2, /* min_div_recip_mul_df. */
652 0, /* max_case_values. */
653 0, /* cache_line_size. */
654 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
655 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
658 static const struct tune_params cortexa73_tunings
=
660 &cortexa57_extra_costs
,
661 &cortexa57_addrcost_table
,
662 &cortexa57_regmove_cost
,
663 &cortexa57_vector_cost
,
664 &cortexa57_branch_cost
,
665 &generic_approx_modes
,
666 4, /* memmov_cost. */
668 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
669 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
670 16, /* function_align. */
673 2, /* int_reassoc_width. */
674 4, /* fp_reassoc_width. */
675 1, /* vec_reassoc_width. */
676 2, /* min_div_recip_mul_sf. */
677 2, /* min_div_recip_mul_df. */
678 0, /* max_case_values. */
679 0, /* cache_line_size. */
680 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
681 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
684 static const struct tune_params exynosm1_tunings
=
686 &exynosm1_extra_costs
,
687 &exynosm1_addrcost_table
,
688 &exynosm1_regmove_cost
,
689 &exynosm1_vector_cost
,
690 &generic_branch_cost
,
691 &exynosm1_approx_modes
,
694 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
695 4, /* function_align. */
698 2, /* int_reassoc_width. */
699 4, /* fp_reassoc_width. */
700 1, /* vec_reassoc_width. */
701 2, /* min_div_recip_mul_sf. */
702 2, /* min_div_recip_mul_df. */
703 48, /* max_case_values. */
704 64, /* cache_line_size. */
705 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
706 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
709 static const struct tune_params thunderx_tunings
=
711 &thunderx_extra_costs
,
712 &generic_addrcost_table
,
713 &thunderx_regmove_cost
,
714 &thunderx_vector_cost
,
715 &generic_branch_cost
,
716 &generic_approx_modes
,
719 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
720 8, /* function_align. */
723 2, /* int_reassoc_width. */
724 4, /* fp_reassoc_width. */
725 1, /* vec_reassoc_width. */
726 2, /* min_div_recip_mul_sf. */
727 2, /* min_div_recip_mul_df. */
728 0, /* max_case_values. */
729 0, /* cache_line_size. */
730 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
731 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
) /* tune_flags. */
734 static const struct tune_params xgene1_tunings
=
737 &xgene1_addrcost_table
,
738 &xgene1_regmove_cost
,
740 &generic_branch_cost
,
741 &xgene1_approx_modes
,
744 AARCH64_FUSE_NOTHING
, /* fusible_ops */
745 16, /* function_align. */
747 16, /* loop_align. */
748 2, /* int_reassoc_width. */
749 4, /* fp_reassoc_width. */
750 1, /* vec_reassoc_width. */
751 2, /* min_div_recip_mul_sf. */
752 2, /* min_div_recip_mul_df. */
753 0, /* max_case_values. */
754 0, /* cache_line_size. */
755 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
756 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
759 static const struct tune_params qdf24xx_tunings
=
761 &qdf24xx_extra_costs
,
762 &qdf24xx_addrcost_table
,
763 &qdf24xx_regmove_cost
,
764 &generic_vector_cost
,
765 &generic_branch_cost
,
766 &generic_approx_modes
,
769 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
770 | AARCH64_FUSE_MOVK_MOVK
), /* fuseable_ops */
771 16, /* function_align. */
773 16, /* loop_align. */
774 2, /* int_reassoc_width. */
775 4, /* fp_reassoc_width. */
776 1, /* vec_reassoc_width. */
777 2, /* min_div_recip_mul_sf. */
778 2, /* min_div_recip_mul_df. */
779 0, /* max_case_values. */
780 64, /* cache_line_size. */
781 tune_params::AUTOPREFETCHER_STRONG
, /* autoprefetcher_model. */
782 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
785 static const struct tune_params thunderx2t99_tunings
=
787 &thunderx2t99_extra_costs
,
788 &thunderx2t99_addrcost_table
,
789 &thunderx2t99_regmove_cost
,
790 &thunderx2t99_vector_cost
,
791 &thunderx2t99_branch_cost
,
792 &generic_approx_modes
,
793 4, /* memmov_cost. */
795 (AARCH64_FUSE_CMP_BRANCH
| AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
796 16, /* function_align. */
798 16, /* loop_align. */
799 3, /* int_reassoc_width. */
800 2, /* fp_reassoc_width. */
801 2, /* vec_reassoc_width. */
802 2, /* min_div_recip_mul_sf. */
803 2, /* min_div_recip_mul_df. */
804 0, /* max_case_values. */
805 64, /* cache_line_size. */
806 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
807 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
810 /* Support for fine-grained override of the tuning structures. */
811 struct aarch64_tuning_override_function
814 void (*parse_override
)(const char*, struct tune_params
*);
817 static void aarch64_parse_fuse_string (const char*, struct tune_params
*);
818 static void aarch64_parse_tune_string (const char*, struct tune_params
*);
820 static const struct aarch64_tuning_override_function
821 aarch64_tuning_override_functions
[] =
823 { "fuse", aarch64_parse_fuse_string
},
824 { "tune", aarch64_parse_tune_string
},
828 /* A processor implementing AArch64. */
831 const char *const name
;
832 enum aarch64_processor ident
;
833 enum aarch64_processor sched_core
;
834 enum aarch64_arch arch
;
835 unsigned architecture_version
;
836 const unsigned long flags
;
837 const struct tune_params
*const tune
;
840 /* Architectures implementing AArch64. */
841 static const struct processor all_architectures
[] =
843 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
844 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
845 #include "aarch64-arches.def"
846 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
849 /* Processor cores implementing AArch64. */
850 static const struct processor all_cores
[] =
852 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
853 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
854 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
855 FLAGS, &COSTS##_tunings},
856 #include "aarch64-cores.def"
857 {"generic", generic
, cortexa53
, AARCH64_ARCH_8A
, 8,
858 AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
859 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
863 /* Target specification. These are populated by the -march, -mtune, -mcpu
864 handling code or by target attributes. */
865 static const struct processor
*selected_arch
;
866 static const struct processor
*selected_cpu
;
867 static const struct processor
*selected_tune
;
869 /* The current tuning set. */
870 struct tune_params aarch64_tune_params
= generic_tunings
;
872 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
874 /* An ISA extension in the co-processor and main instruction set space. */
875 struct aarch64_option_extension
877 const char *const name
;
878 const unsigned long flags_on
;
879 const unsigned long flags_off
;
882 typedef enum aarch64_cond_code
884 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
885 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
886 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
890 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
892 /* The condition codes of the processor, and the inverse function. */
893 static const char * const aarch64_condition_codes
[] =
895 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
896 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
899 /* Generate code to enable conditional branches in functions over 1 MiB. */
901 aarch64_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
902 const char * branch_format
)
904 rtx_code_label
* tmp_label
= gen_label_rtx ();
907 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
,
908 CODE_LABEL_NUMBER (tmp_label
));
909 const char *label_ptr
= targetm
.strip_name_encoding (label_buf
);
910 rtx dest_label
= operands
[pos_label
];
911 operands
[pos_label
] = tmp_label
;
913 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
914 output_asm_insn (buffer
, operands
);
916 snprintf (buffer
, sizeof (buffer
), "b\t%%l%d\n%s:", pos_label
, label_ptr
);
917 operands
[pos_label
] = dest_label
;
918 output_asm_insn (buffer
, operands
);
923 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
925 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
926 if (TARGET_GENERAL_REGS_ONLY
)
927 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
929 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
932 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
933 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
934 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
935 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
936 cost (in this case the best class is the lowest cost one). Using ALL_REGS
937 irrespectively of its cost results in bad allocations with many redundant
938 int<->FP moves which are expensive on various cores.
939 To avoid this we don't allow ALL_REGS as the allocno class, but force a
940 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
941 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
942 Otherwise set the allocno class depending on the mode.
943 The result of this is that it is no longer inefficient to have a higher
944 memory move cost than the register move cost.
948 aarch64_ira_change_pseudo_allocno_class (int regno
, reg_class_t allocno_class
,
949 reg_class_t best_class
)
951 enum machine_mode mode
;
953 if (allocno_class
!= ALL_REGS
)
954 return allocno_class
;
956 if (best_class
!= ALL_REGS
)
959 mode
= PSEUDO_REGNO_MODE (regno
);
960 return FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
) ? FP_REGS
: GENERAL_REGS
;
964 aarch64_min_divisions_for_recip_mul (enum machine_mode mode
)
966 if (GET_MODE_UNIT_SIZE (mode
) == 4)
967 return aarch64_tune_params
.min_div_recip_mul_sf
;
968 return aarch64_tune_params
.min_div_recip_mul_df
;
972 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
973 enum machine_mode mode
)
975 if (VECTOR_MODE_P (mode
))
976 return aarch64_tune_params
.vec_reassoc_width
;
977 if (INTEGRAL_MODE_P (mode
))
978 return aarch64_tune_params
.int_reassoc_width
;
979 if (FLOAT_MODE_P (mode
))
980 return aarch64_tune_params
.fp_reassoc_width
;
984 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
986 aarch64_dbx_register_number (unsigned regno
)
988 if (GP_REGNUM_P (regno
))
989 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
990 else if (regno
== SP_REGNUM
)
991 return AARCH64_DWARF_SP
;
992 else if (FP_REGNUM_P (regno
))
993 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
995 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
996 equivalent DWARF register. */
997 return DWARF_FRAME_REGISTERS
;
1000 /* Return TRUE if MODE is any of the large INT modes. */
1002 aarch64_vect_struct_mode_p (machine_mode mode
)
1004 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
1007 /* Return TRUE if MODE is any of the vector modes. */
1009 aarch64_vector_mode_p (machine_mode mode
)
1011 return aarch64_vector_mode_supported_p (mode
)
1012 || aarch64_vect_struct_mode_p (mode
);
1015 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1017 aarch64_array_mode_supported_p (machine_mode mode
,
1018 unsigned HOST_WIDE_INT nelems
)
1021 && (AARCH64_VALID_SIMD_QREG_MODE (mode
)
1022 || AARCH64_VALID_SIMD_DREG_MODE (mode
))
1023 && (nelems
>= 2 && nelems
<= 4))
1029 /* Implement HARD_REGNO_NREGS. */
1032 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
1034 switch (aarch64_regno_regclass (regno
))
1038 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
1040 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1045 /* Implement HARD_REGNO_MODE_OK. */
1048 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
1050 if (GET_MODE_CLASS (mode
) == MODE_CC
)
1051 return regno
== CC_REGNUM
;
1053 if (regno
== SP_REGNUM
)
1054 /* The purpose of comparing with ptr_mode is to support the
1055 global register variable associated with the stack pointer
1056 register via the syntax of asm ("wsp") in ILP32. */
1057 return mode
== Pmode
|| mode
== ptr_mode
;
1059 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
1060 return mode
== Pmode
;
1062 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
1065 if (FP_REGNUM_P (regno
))
1067 if (aarch64_vect_struct_mode_p (mode
))
1069 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
1077 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
1079 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
1082 /* Handle modes that fit within single registers. */
1083 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
1085 if (GET_MODE_SIZE (mode
) >= 4)
1090 /* Fall back to generic for multi-reg and very large modes. */
1092 return choose_hard_reg_mode (regno
, nregs
, false);
1095 /* Return true if calls to DECL should be treated as
1096 long-calls (ie called via a register). */
1098 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
1103 /* Return true if calls to symbol-ref SYM should be treated as
1104 long-calls (ie called via a register). */
1106 aarch64_is_long_call_p (rtx sym
)
1108 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
1111 /* Return true if calls to symbol-ref SYM should not go through
1115 aarch64_is_noplt_call_p (rtx sym
)
1117 const_tree decl
= SYMBOL_REF_DECL (sym
);
1122 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl
)))
1123 && !targetm
.binds_local_p (decl
))
1129 /* Return true if the offsets to a zero/sign-extract operation
1130 represent an expression that matches an extend operation. The
1131 operands represent the paramters from
1133 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
1135 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
1138 HOST_WIDE_INT mult_val
, extract_val
;
1140 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
1143 mult_val
= INTVAL (mult_imm
);
1144 extract_val
= INTVAL (extract_imm
);
1147 && extract_val
< GET_MODE_BITSIZE (mode
)
1148 && exact_log2 (extract_val
& ~7) > 0
1149 && (extract_val
& 7) <= 4
1150 && mult_val
== (1 << (extract_val
& 7)))
1156 /* Emit an insn that's a simple single-set. Both the operands must be
1157 known to be valid. */
1158 inline static rtx_insn
*
1159 emit_set_insn (rtx x
, rtx y
)
1161 return emit_insn (gen_rtx_SET (x
, y
));
1164 /* X and Y are two things to compare using CODE. Emit the compare insn and
1165 return the rtx for register 0 in the proper mode. */
1167 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
1169 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
1170 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
1172 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
1176 /* Build the SYMBOL_REF for __tls_get_addr. */
1178 static GTY(()) rtx tls_get_addr_libfunc
;
1181 aarch64_tls_get_addr (void)
1183 if (!tls_get_addr_libfunc
)
1184 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
1185 return tls_get_addr_libfunc
;
1188 /* Return the TLS model to use for ADDR. */
1190 static enum tls_model
1191 tls_symbolic_operand_type (rtx addr
)
1193 enum tls_model tls_kind
= TLS_MODEL_NONE
;
1196 if (GET_CODE (addr
) == CONST
)
1198 split_const (addr
, &sym
, &addend
);
1199 if (GET_CODE (sym
) == SYMBOL_REF
)
1200 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
1202 else if (GET_CODE (addr
) == SYMBOL_REF
)
1203 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
1208 /* We'll allow lo_sum's in addresses in our legitimate addresses
1209 so that combine would take care of combining addresses where
1210 necessary, but for generation purposes, we'll generate the address
1213 tmp = hi (symbol_ref); adrp x1, foo
1214 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1218 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1219 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1223 Load TLS symbol, depending on TLS mechanism and TLS access model.
1225 Global Dynamic - Traditional TLS:
1226 adrp tmp, :tlsgd:imm
1227 add dest, tmp, #:tlsgd_lo12:imm
1230 Global Dynamic - TLS Descriptors:
1231 adrp dest, :tlsdesc:imm
1232 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1233 add dest, dest, #:tlsdesc_lo12:imm
1240 adrp tmp, :gottprel:imm
1241 ldr dest, [tmp, #:gottprel_lo12:imm]
1246 add t0, tp, #:tprel_hi12:imm, lsl #12
1247 add t0, t0, #:tprel_lo12_nc:imm
1251 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
1252 enum aarch64_symbol_type type
)
1256 case SYMBOL_SMALL_ABSOLUTE
:
1258 /* In ILP32, the mode of dest can be either SImode or DImode. */
1260 machine_mode mode
= GET_MODE (dest
);
1262 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1264 if (can_create_pseudo_p ())
1265 tmp_reg
= gen_reg_rtx (mode
);
1267 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1268 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
1272 case SYMBOL_TINY_ABSOLUTE
:
1273 emit_insn (gen_rtx_SET (dest
, imm
));
1276 case SYMBOL_SMALL_GOT_28K
:
1278 machine_mode mode
= GET_MODE (dest
);
1279 rtx gp_rtx
= pic_offset_table_rtx
;
1283 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1284 here before rtl expand. Tree IVOPT will generate rtl pattern to
1285 decide rtx costs, in which case pic_offset_table_rtx is not
1286 initialized. For that case no need to generate the first adrp
1287 instruction as the final cost for global variable access is
1291 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1292 using the page base as GOT base, the first page may be wasted,
1293 in the worst scenario, there is only 28K space for GOT).
1295 The generate instruction sequence for accessing global variable
1298 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1300 Only one instruction needed. But we must initialize
1301 pic_offset_table_rtx properly. We generate initialize insn for
1302 every global access, and allow CSE to remove all redundant.
1304 The final instruction sequences will look like the following
1305 for multiply global variables access.
1307 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1309 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1310 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1311 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1314 rtx s
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1315 crtl
->uses_pic_offset_table
= 1;
1316 emit_move_insn (gp_rtx
, gen_rtx_HIGH (Pmode
, s
));
1318 if (mode
!= GET_MODE (gp_rtx
))
1319 gp_rtx
= gen_lowpart (mode
, gp_rtx
);
1323 if (mode
== ptr_mode
)
1326 insn
= gen_ldr_got_small_28k_di (dest
, gp_rtx
, imm
);
1328 insn
= gen_ldr_got_small_28k_si (dest
, gp_rtx
, imm
);
1330 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1334 gcc_assert (mode
== Pmode
);
1336 insn
= gen_ldr_got_small_28k_sidi (dest
, gp_rtx
, imm
);
1337 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1340 /* The operand is expected to be MEM. Whenever the related insn
1341 pattern changed, above code which calculate mem should be
1343 gcc_assert (GET_CODE (mem
) == MEM
);
1344 MEM_READONLY_P (mem
) = 1;
1345 MEM_NOTRAP_P (mem
) = 1;
1350 case SYMBOL_SMALL_GOT_4G
:
1352 /* In ILP32, the mode of dest can be either SImode or DImode,
1353 while the got entry is always of SImode size. The mode of
1354 dest depends on how dest is used: if dest is assigned to a
1355 pointer (e.g. in the memory), it has SImode; it may have
1356 DImode if dest is dereferenced to access the memeory.
1357 This is why we have to handle three different ldr_got_small
1358 patterns here (two patterns for ILP32). */
1363 machine_mode mode
= GET_MODE (dest
);
1365 if (can_create_pseudo_p ())
1366 tmp_reg
= gen_reg_rtx (mode
);
1368 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1369 if (mode
== ptr_mode
)
1372 insn
= gen_ldr_got_small_di (dest
, tmp_reg
, imm
);
1374 insn
= gen_ldr_got_small_si (dest
, tmp_reg
, imm
);
1376 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1380 gcc_assert (mode
== Pmode
);
1382 insn
= gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
);
1383 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1386 gcc_assert (GET_CODE (mem
) == MEM
);
1387 MEM_READONLY_P (mem
) = 1;
1388 MEM_NOTRAP_P (mem
) = 1;
1393 case SYMBOL_SMALL_TLSGD
:
1396 machine_mode mode
= GET_MODE (dest
);
1397 rtx result
= gen_rtx_REG (mode
, R0_REGNUM
);
1401 aarch64_emit_call_insn (gen_tlsgd_small_si (result
, imm
));
1403 aarch64_emit_call_insn (gen_tlsgd_small_di (result
, imm
));
1404 insns
= get_insns ();
1407 RTL_CONST_CALL_P (insns
) = 1;
1408 emit_libcall_block (insns
, dest
, result
, imm
);
1412 case SYMBOL_SMALL_TLSDESC
:
1414 machine_mode mode
= GET_MODE (dest
);
1415 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
1418 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1420 /* In ILP32, the got entry is always of SImode size. Unlike
1421 small GOT, the dest is fixed at reg 0. */
1423 emit_insn (gen_tlsdesc_small_si (imm
));
1425 emit_insn (gen_tlsdesc_small_di (imm
));
1426 tp
= aarch64_load_tp (NULL
);
1429 tp
= gen_lowpart (mode
, tp
);
1431 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
1432 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1436 case SYMBOL_SMALL_TLSIE
:
1438 /* In ILP32, the mode of dest can be either SImode or DImode,
1439 while the got entry is always of SImode size. The mode of
1440 dest depends on how dest is used: if dest is assigned to a
1441 pointer (e.g. in the memory), it has SImode; it may have
1442 DImode if dest is dereferenced to access the memeory.
1443 This is why we have to handle three different tlsie_small
1444 patterns here (two patterns for ILP32). */
1445 machine_mode mode
= GET_MODE (dest
);
1446 rtx tmp_reg
= gen_reg_rtx (mode
);
1447 rtx tp
= aarch64_load_tp (NULL
);
1449 if (mode
== ptr_mode
)
1452 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
1455 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
1456 tp
= gen_lowpart (mode
, tp
);
1461 gcc_assert (mode
== Pmode
);
1462 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
1465 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
1466 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1470 case SYMBOL_TLSLE12
:
1471 case SYMBOL_TLSLE24
:
1472 case SYMBOL_TLSLE32
:
1473 case SYMBOL_TLSLE48
:
1475 machine_mode mode
= GET_MODE (dest
);
1476 rtx tp
= aarch64_load_tp (NULL
);
1479 tp
= gen_lowpart (mode
, tp
);
1483 case SYMBOL_TLSLE12
:
1484 emit_insn ((mode
== DImode
? gen_tlsle12_di
: gen_tlsle12_si
)
1487 case SYMBOL_TLSLE24
:
1488 emit_insn ((mode
== DImode
? gen_tlsle24_di
: gen_tlsle24_si
)
1491 case SYMBOL_TLSLE32
:
1492 emit_insn ((mode
== DImode
? gen_tlsle32_di
: gen_tlsle32_si
)
1494 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1497 case SYMBOL_TLSLE48
:
1498 emit_insn ((mode
== DImode
? gen_tlsle48_di
: gen_tlsle48_si
)
1500 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1507 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1511 case SYMBOL_TINY_GOT
:
1512 emit_insn (gen_ldr_got_tiny (dest
, imm
));
1515 case SYMBOL_TINY_TLSIE
:
1517 machine_mode mode
= GET_MODE (dest
);
1518 rtx tp
= aarch64_load_tp (NULL
);
1520 if (mode
== ptr_mode
)
1523 emit_insn (gen_tlsie_tiny_di (dest
, imm
, tp
));
1526 tp
= gen_lowpart (mode
, tp
);
1527 emit_insn (gen_tlsie_tiny_si (dest
, imm
, tp
));
1532 gcc_assert (mode
== Pmode
);
1533 emit_insn (gen_tlsie_tiny_sidi (dest
, imm
, tp
));
1536 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1545 /* Emit a move from SRC to DEST. Assume that the move expanders can
1546 handle all moves if !can_create_pseudo_p (). The distinction is
1547 important because, unlike emit_move_insn, the move expanders know
1548 how to force Pmode objects into the constant pool even when the
1549 constant pool address is not itself legitimate. */
1551 aarch64_emit_move (rtx dest
, rtx src
)
1553 return (can_create_pseudo_p ()
1554 ? emit_move_insn (dest
, src
)
1555 : emit_move_insn_1 (dest
, src
));
1558 /* Split a 128-bit move operation into two 64-bit move operations,
1559 taking care to handle partial overlap of register to register
1560 copies. Special cases are needed when moving between GP regs and
1561 FP regs. SRC can be a register, constant or memory; DST a register
1562 or memory. If either operand is memory it must not have any side
1565 aarch64_split_128bit_move (rtx dst
, rtx src
)
1570 machine_mode mode
= GET_MODE (dst
);
1572 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1573 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1574 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1576 if (REG_P (dst
) && REG_P (src
))
1578 int src_regno
= REGNO (src
);
1579 int dst_regno
= REGNO (dst
);
1581 /* Handle FP <-> GP regs. */
1582 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1584 src_lo
= gen_lowpart (word_mode
, src
);
1585 src_hi
= gen_highpart (word_mode
, src
);
1589 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1590 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1594 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1595 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1599 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1601 dst_lo
= gen_lowpart (word_mode
, dst
);
1602 dst_hi
= gen_highpart (word_mode
, dst
);
1606 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1607 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1611 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1612 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1618 dst_lo
= gen_lowpart (word_mode
, dst
);
1619 dst_hi
= gen_highpart (word_mode
, dst
);
1620 src_lo
= gen_lowpart (word_mode
, src
);
1621 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1623 /* At most one pairing may overlap. */
1624 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1626 aarch64_emit_move (dst_hi
, src_hi
);
1627 aarch64_emit_move (dst_lo
, src_lo
);
1631 aarch64_emit_move (dst_lo
, src_lo
);
1632 aarch64_emit_move (dst_hi
, src_hi
);
1637 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1639 return (! REG_P (src
)
1640 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1643 /* Split a complex SIMD combine. */
1646 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1648 machine_mode src_mode
= GET_MODE (src1
);
1649 machine_mode dst_mode
= GET_MODE (dst
);
1651 gcc_assert (VECTOR_MODE_P (dst_mode
));
1653 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1655 rtx (*gen
) (rtx
, rtx
, rtx
);
1660 gen
= gen_aarch64_simd_combinev8qi
;
1663 gen
= gen_aarch64_simd_combinev4hi
;
1666 gen
= gen_aarch64_simd_combinev2si
;
1669 gen
= gen_aarch64_simd_combinev4hf
;
1672 gen
= gen_aarch64_simd_combinev2sf
;
1675 gen
= gen_aarch64_simd_combinedi
;
1678 gen
= gen_aarch64_simd_combinedf
;
1684 emit_insn (gen (dst
, src1
, src2
));
1689 /* Split a complex SIMD move. */
1692 aarch64_split_simd_move (rtx dst
, rtx src
)
1694 machine_mode src_mode
= GET_MODE (src
);
1695 machine_mode dst_mode
= GET_MODE (dst
);
1697 gcc_assert (VECTOR_MODE_P (dst_mode
));
1699 if (REG_P (dst
) && REG_P (src
))
1701 rtx (*gen
) (rtx
, rtx
);
1703 gcc_assert (VECTOR_MODE_P (src_mode
));
1708 gen
= gen_aarch64_split_simd_movv16qi
;
1711 gen
= gen_aarch64_split_simd_movv8hi
;
1714 gen
= gen_aarch64_split_simd_movv4si
;
1717 gen
= gen_aarch64_split_simd_movv2di
;
1720 gen
= gen_aarch64_split_simd_movv8hf
;
1723 gen
= gen_aarch64_split_simd_movv4sf
;
1726 gen
= gen_aarch64_split_simd_movv2df
;
1732 emit_insn (gen (dst
, src
));
1738 aarch64_zero_extend_const_eq (machine_mode xmode
, rtx x
,
1739 machine_mode ymode
, rtx y
)
1741 rtx r
= simplify_const_unary_operation (ZERO_EXTEND
, xmode
, y
, ymode
);
1742 gcc_assert (r
!= NULL
);
1743 return rtx_equal_p (x
, r
);
1748 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1750 if (can_create_pseudo_p ())
1751 return force_reg (mode
, value
);
1754 x
= aarch64_emit_move (x
, value
);
1761 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1763 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1766 /* Load the full offset into a register. This
1767 might be improvable in the future. */
1768 high
= GEN_INT (offset
);
1770 high
= aarch64_force_temporary (mode
, temp
, high
);
1771 reg
= aarch64_force_temporary (mode
, temp
,
1772 gen_rtx_PLUS (mode
, high
, reg
));
1774 return plus_constant (mode
, reg
, offset
);
1778 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1782 unsigned HOST_WIDE_INT val
, val2
, mask
;
1783 int one_match
, zero_match
;
1788 if (aarch64_move_imm (val
, mode
))
1791 emit_insn (gen_rtx_SET (dest
, imm
));
1795 if ((val
>> 32) == 0 || mode
== SImode
)
1799 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& 0xffff)));
1801 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1802 GEN_INT ((val
>> 16) & 0xffff)));
1804 emit_insn (gen_insv_immdi (dest
, GEN_INT (16),
1805 GEN_INT ((val
>> 16) & 0xffff)));
1810 /* Remaining cases are all for DImode. */
1813 zero_match
= ((val
& mask
) == 0) + ((val
& (mask
<< 16)) == 0) +
1814 ((val
& (mask
<< 32)) == 0) + ((val
& (mask
<< 48)) == 0);
1815 one_match
= ((~val
& mask
) == 0) + ((~val
& (mask
<< 16)) == 0) +
1816 ((~val
& (mask
<< 32)) == 0) + ((~val
& (mask
<< 48)) == 0);
1818 if (zero_match
!= 2 && one_match
!= 2)
1820 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1821 For a 64-bit bitmask try whether changing 16 bits to all ones or
1822 zeroes creates a valid bitmask. To check any repeated bitmask,
1823 try using 16 bits from the other 32-bit half of val. */
1825 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1828 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1831 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1833 val2
= val2
& ~mask
;
1834 val2
= val2
| (((val2
>> 32) | (val2
<< 32)) & mask
);
1835 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1842 emit_insn (gen_rtx_SET (dest
, GEN_INT (val2
)));
1843 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1844 GEN_INT ((val
>> i
) & 0xffff)));
1850 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1851 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1852 otherwise skip zero bits. */
1856 val2
= one_match
> zero_match
? ~val
: val
;
1857 i
= (val2
& mask
) != 0 ? 0 : (val2
& (mask
<< 16)) != 0 ? 16 : 32;
1860 emit_insn (gen_rtx_SET (dest
, GEN_INT (one_match
> zero_match
1861 ? (val
| ~(mask
<< i
))
1862 : (val
& (mask
<< i
)))));
1863 for (i
+= 16; i
< 64; i
+= 16)
1865 if ((val2
& (mask
<< i
)) == 0)
1868 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1869 GEN_INT ((val
>> i
) & 0xffff)));
1878 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1880 machine_mode mode
= GET_MODE (dest
);
1882 gcc_assert (mode
== SImode
|| mode
== DImode
);
1884 /* Check on what type of symbol it is. */
1885 if (GET_CODE (imm
) == SYMBOL_REF
1886 || GET_CODE (imm
) == LABEL_REF
1887 || GET_CODE (imm
) == CONST
)
1889 rtx mem
, base
, offset
;
1890 enum aarch64_symbol_type sty
;
1892 /* If we have (const (plus symbol offset)), separate out the offset
1893 before we start classifying the symbol. */
1894 split_const (imm
, &base
, &offset
);
1896 sty
= aarch64_classify_symbol (base
, offset
);
1899 case SYMBOL_FORCE_TO_MEM
:
1900 if (offset
!= const0_rtx
1901 && targetm
.cannot_force_const_mem (mode
, imm
))
1903 gcc_assert (can_create_pseudo_p ());
1904 base
= aarch64_force_temporary (mode
, dest
, base
);
1905 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1906 aarch64_emit_move (dest
, base
);
1910 mem
= force_const_mem (ptr_mode
, imm
);
1913 /* If we aren't generating PC relative literals, then
1914 we need to expand the literal pool access carefully.
1915 This is something that needs to be done in a number
1916 of places, so could well live as a separate function. */
1917 if (!aarch64_pcrelative_literal_loads
)
1919 gcc_assert (can_create_pseudo_p ());
1920 base
= gen_reg_rtx (ptr_mode
);
1921 aarch64_expand_mov_immediate (base
, XEXP (mem
, 0));
1922 mem
= gen_rtx_MEM (ptr_mode
, base
);
1925 if (mode
!= ptr_mode
)
1926 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1928 emit_insn (gen_rtx_SET (dest
, mem
));
1932 case SYMBOL_SMALL_TLSGD
:
1933 case SYMBOL_SMALL_TLSDESC
:
1934 case SYMBOL_SMALL_TLSIE
:
1935 case SYMBOL_SMALL_GOT_28K
:
1936 case SYMBOL_SMALL_GOT_4G
:
1937 case SYMBOL_TINY_GOT
:
1938 case SYMBOL_TINY_TLSIE
:
1939 if (offset
!= const0_rtx
)
1941 gcc_assert(can_create_pseudo_p ());
1942 base
= aarch64_force_temporary (mode
, dest
, base
);
1943 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1944 aarch64_emit_move (dest
, base
);
1949 case SYMBOL_SMALL_ABSOLUTE
:
1950 case SYMBOL_TINY_ABSOLUTE
:
1951 case SYMBOL_TLSLE12
:
1952 case SYMBOL_TLSLE24
:
1953 case SYMBOL_TLSLE32
:
1954 case SYMBOL_TLSLE48
:
1955 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1963 if (!CONST_INT_P (imm
))
1965 if (GET_CODE (imm
) == HIGH
)
1966 emit_insn (gen_rtx_SET (dest
, imm
));
1969 rtx mem
= force_const_mem (mode
, imm
);
1971 emit_insn (gen_rtx_SET (dest
, mem
));
1977 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1980 /* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
1981 temporary value if necessary. FRAME_RELATED_P should be true if
1982 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
1983 to the generated instructions. If SCRATCHREG is known to hold
1984 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
1987 Since this function may be used to adjust the stack pointer, we must
1988 ensure that it cannot cause transient stack deallocation (for example
1989 by first incrementing SP and then decrementing when adjusting by a
1990 large immediate). */
1993 aarch64_add_constant_internal (machine_mode mode
, int regnum
, int scratchreg
,
1994 HOST_WIDE_INT delta
, bool frame_related_p
,
1997 HOST_WIDE_INT mdelta
= abs_hwi (delta
);
1998 rtx this_rtx
= gen_rtx_REG (mode
, regnum
);
2004 /* Single instruction adjustment. */
2005 if (aarch64_uimm12_shift (mdelta
))
2007 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
)));
2008 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2012 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2013 Only do this if mdelta is not a 16-bit move as adjusting using a move
2015 if (mdelta
< 0x1000000 && !aarch64_move_imm (mdelta
, mode
))
2017 HOST_WIDE_INT low_off
= mdelta
& 0xfff;
2019 low_off
= delta
< 0 ? -low_off
: low_off
;
2020 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (low_off
)));
2021 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2022 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
- low_off
)));
2023 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2027 /* Emit a move immediate if required and an addition/subtraction. */
2028 rtx scratch_rtx
= gen_rtx_REG (mode
, scratchreg
);
2030 aarch64_internal_mov_immediate (scratch_rtx
, GEN_INT (mdelta
), true, mode
);
2031 insn
= emit_insn (delta
< 0 ? gen_sub2_insn (this_rtx
, scratch_rtx
)
2032 : gen_add2_insn (this_rtx
, scratch_rtx
));
2033 if (frame_related_p
)
2035 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2036 rtx adj
= plus_constant (mode
, this_rtx
, delta
);
2037 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, gen_rtx_SET (this_rtx
, adj
));
2042 aarch64_add_constant (machine_mode mode
, int regnum
, int scratchreg
,
2043 HOST_WIDE_INT delta
)
2045 aarch64_add_constant_internal (mode
, regnum
, scratchreg
, delta
, false, true);
2049 aarch64_add_sp (int scratchreg
, HOST_WIDE_INT delta
, bool emit_move_imm
)
2051 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, delta
,
2052 true, emit_move_imm
);
2056 aarch64_sub_sp (int scratchreg
, HOST_WIDE_INT delta
, bool frame_related_p
)
2058 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, -delta
,
2059 frame_related_p
, true);
2063 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
2064 tree exp ATTRIBUTE_UNUSED
)
2066 /* Currently, always true. */
2070 /* Implement TARGET_PASS_BY_REFERENCE. */
2073 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
2076 bool named ATTRIBUTE_UNUSED
)
2079 machine_mode dummymode
;
2082 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2083 size
= (mode
== BLKmode
&& type
)
2084 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2086 /* Aggregates are passed by reference based on their size. */
2087 if (type
&& AGGREGATE_TYPE_P (type
))
2089 size
= int_size_in_bytes (type
);
2092 /* Variable sized arguments are always returned by reference. */
2096 /* Can this be a candidate to be passed in fp/simd register(s)? */
2097 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2102 /* Arguments which are variable sized or larger than 2 registers are
2103 passed by reference unless they are a homogenous floating point
2105 return size
> 2 * UNITS_PER_WORD
;
2108 /* Return TRUE if VALTYPE is padded to its least significant bits. */
2110 aarch64_return_in_msb (const_tree valtype
)
2112 machine_mode dummy_mode
;
2115 /* Never happens in little-endian mode. */
2116 if (!BYTES_BIG_ENDIAN
)
2119 /* Only composite types smaller than or equal to 16 bytes can
2120 be potentially returned in registers. */
2121 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
2122 || int_size_in_bytes (valtype
) <= 0
2123 || int_size_in_bytes (valtype
) > 16)
2126 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2127 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2128 is always passed/returned in the least significant bits of fp/simd
2130 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
2131 &dummy_mode
, &dummy_int
, NULL
))
2137 /* Implement TARGET_FUNCTION_VALUE.
2138 Define how to find the value returned by a function. */
2141 aarch64_function_value (const_tree type
, const_tree func
,
2142 bool outgoing ATTRIBUTE_UNUSED
)
2147 machine_mode ag_mode
;
2149 mode
= TYPE_MODE (type
);
2150 if (INTEGRAL_TYPE_P (type
))
2151 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
2153 if (aarch64_return_in_msb (type
))
2155 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2157 if (size
% UNITS_PER_WORD
!= 0)
2159 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
2160 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
2164 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2165 &ag_mode
, &count
, NULL
))
2167 if (!aarch64_composite_type_p (type
, mode
))
2169 gcc_assert (count
== 1 && mode
== ag_mode
);
2170 return gen_rtx_REG (mode
, V0_REGNUM
);
2177 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
2178 for (i
= 0; i
< count
; i
++)
2180 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
2181 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2182 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
2183 XVECEXP (par
, 0, i
) = tmp
;
2189 return gen_rtx_REG (mode
, R0_REGNUM
);
2192 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2193 Return true if REGNO is the number of a hard register in which the values
2194 of called function may come back. */
2197 aarch64_function_value_regno_p (const unsigned int regno
)
2199 /* Maximum of 16 bytes can be returned in the general registers. Examples
2200 of 16-byte return values are: 128-bit integers and 16-byte small
2201 structures (excluding homogeneous floating-point aggregates). */
2202 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
2205 /* Up to four fp/simd registers can return a function value, e.g. a
2206 homogeneous floating-point aggregate having four members. */
2207 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
2208 return TARGET_FLOAT
;
2213 /* Implement TARGET_RETURN_IN_MEMORY.
2215 If the type T of the result of a function is such that
2217 would require that arg be passed as a value in a register (or set of
2218 registers) according to the parameter passing rules, then the result
2219 is returned in the same registers as would be used for such an
2223 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
2226 machine_mode ag_mode
;
2229 if (!AGGREGATE_TYPE_P (type
)
2230 && TREE_CODE (type
) != COMPLEX_TYPE
2231 && TREE_CODE (type
) != VECTOR_TYPE
)
2232 /* Simple scalar types always returned in registers. */
2235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
2242 /* Types larger than 2 registers returned in memory. */
2243 size
= int_size_in_bytes (type
);
2244 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
2248 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
2249 const_tree type
, int *nregs
)
2251 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2252 return aarch64_vfp_is_call_or_return_candidate (mode
,
2254 &pcum
->aapcs_vfp_rmode
,
2259 struct aarch64_fn_arg_alignment
2261 /* Alignment for FIELD_DECLs in function arguments. */
2262 unsigned int alignment
;
2263 /* Alignment for decls other than FIELD_DECLs in function arguments. */
2264 unsigned int warn_alignment
;
2267 /* Given MODE and TYPE of a function argument, return a pair of alignments in
2268 bits. The idea is to suppress any stronger alignment requested by
2269 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2270 This is a helper function for local use only. */
2272 static struct aarch64_fn_arg_alignment
2273 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
2275 struct aarch64_fn_arg_alignment aa
;
2277 aa
.warn_alignment
= 0;
2281 aa
.alignment
= GET_MODE_ALIGNMENT (mode
);
2285 if (integer_zerop (TYPE_SIZE (type
)))
2288 gcc_assert (TYPE_MODE (type
) == mode
);
2290 if (!AGGREGATE_TYPE_P (type
))
2292 aa
.alignment
= TYPE_ALIGN (TYPE_MAIN_VARIANT (type
));
2296 if (TREE_CODE (type
) == ARRAY_TYPE
)
2298 aa
.alignment
= TYPE_ALIGN (TREE_TYPE (type
));
2302 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2304 if (TREE_CODE (field
) == FIELD_DECL
)
2305 aa
.alignment
= std::max (aa
.alignment
, DECL_ALIGN (field
));
2307 aa
.warn_alignment
= std::max (aa
.warn_alignment
, DECL_ALIGN (field
));
2313 /* Layout a function argument according to the AAPCS64 rules. The rule
2314 numbers refer to the rule numbers in the AAPCS64. */
2317 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2319 bool named ATTRIBUTE_UNUSED
)
2321 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2322 int ncrn
, nvrn
, nregs
;
2323 bool allocate_ncrn
, allocate_nvrn
;
2326 /* We need to do this once per argument. */
2327 if (pcum
->aapcs_arg_processed
)
2330 pcum
->aapcs_arg_processed
= true;
2332 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2334 = ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
2337 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
2338 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
2343 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2344 The following code thus handles passing by SIMD/FP registers first. */
2346 nvrn
= pcum
->aapcs_nvrn
;
2348 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2349 and homogenous short-vector aggregates (HVA). */
2353 aarch64_err_no_fpadvsimd (mode
, "argument");
2355 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
2357 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
2358 if (!aarch64_composite_type_p (type
, mode
))
2360 gcc_assert (nregs
== 1);
2361 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
2367 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2368 for (i
= 0; i
< nregs
; i
++)
2370 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
2371 V0_REGNUM
+ nvrn
+ i
);
2372 tmp
= gen_rtx_EXPR_LIST
2374 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
2375 XVECEXP (par
, 0, i
) = tmp
;
2377 pcum
->aapcs_reg
= par
;
2383 /* C.3 NSRN is set to 8. */
2384 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
2389 ncrn
= pcum
->aapcs_ncrn
;
2390 nregs
= size
/ UNITS_PER_WORD
;
2392 /* C6 - C9. though the sign and zero extension semantics are
2393 handled elsewhere. This is the case where the argument fits
2394 entirely general registers. */
2395 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
2398 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
2400 /* C.8 if the argument has an alignment of 16 then the NGRN is
2401 rounded up to the next even number. */
2402 if (nregs
== 2 && ncrn
% 2)
2404 struct aarch64_fn_arg_alignment aa
2405 = aarch64_function_arg_alignment (mode
, type
);
2407 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
2408 comparisons are there because for > 16 * BITS_PER_UNIT
2409 alignment nregs should be > 2 and therefore it should be
2410 passed by reference rather than value. */
2411 if (aa
.warn_alignment
== 16 * BITS_PER_UNIT
2412 && aa
.alignment
< aa
.warn_alignment
2414 && currently_expanding_gimple_stmt
)
2415 inform (input_location
,
2416 "parameter passing for argument of type %qT "
2417 "changed in GCC 7.1", type
);
2418 else if (aa
.alignment
== 16 * BITS_PER_UNIT
)
2421 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
2425 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2426 A reg is still generated for it, but the caller should be smart
2427 enough not to use it. */
2428 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
2429 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
2435 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2436 for (i
= 0; i
< nregs
; i
++)
2438 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
2439 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2440 GEN_INT (i
* UNITS_PER_WORD
));
2441 XVECEXP (par
, 0, i
) = tmp
;
2443 pcum
->aapcs_reg
= par
;
2446 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
2451 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
2453 /* The argument is passed on stack; record the needed number of words for
2454 this argument and align the total size if necessary. */
2456 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
2457 struct aarch64_fn_arg_alignment aa
2458 = aarch64_function_arg_alignment (mode
, type
);
2460 if (aa
.alignment
== 16 * BITS_PER_UNIT
)
2461 pcum
->aapcs_stack_size
= ROUND_UP (pcum
->aapcs_stack_size
,
2462 16 / UNITS_PER_WORD
);
2466 /* Implement TARGET_FUNCTION_ARG. */
2469 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2470 const_tree type
, bool named
)
2472 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2473 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
2475 if (mode
== VOIDmode
)
2478 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2479 return pcum
->aapcs_reg
;
2483 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
2484 const_tree fntype ATTRIBUTE_UNUSED
,
2485 rtx libname ATTRIBUTE_UNUSED
,
2486 const_tree fndecl ATTRIBUTE_UNUSED
,
2487 unsigned n_named ATTRIBUTE_UNUSED
)
2489 pcum
->aapcs_ncrn
= 0;
2490 pcum
->aapcs_nvrn
= 0;
2491 pcum
->aapcs_nextncrn
= 0;
2492 pcum
->aapcs_nextnvrn
= 0;
2493 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
2494 pcum
->aapcs_reg
= NULL_RTX
;
2495 pcum
->aapcs_arg_processed
= false;
2496 pcum
->aapcs_stack_words
= 0;
2497 pcum
->aapcs_stack_size
= 0;
2500 && fndecl
&& TREE_PUBLIC (fndecl
)
2501 && fntype
&& fntype
!= error_mark_node
)
2503 const_tree type
= TREE_TYPE (fntype
);
2504 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
2505 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
2506 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
2507 &mode
, &nregs
, NULL
))
2508 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
2514 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
2519 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2520 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
2522 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2523 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
2524 != (pcum
->aapcs_stack_words
!= 0));
2525 pcum
->aapcs_arg_processed
= false;
2526 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
2527 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
2528 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
2529 pcum
->aapcs_stack_words
= 0;
2530 pcum
->aapcs_reg
= NULL_RTX
;
2535 aarch64_function_arg_regno_p (unsigned regno
)
2537 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
2538 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
2541 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2542 PARM_BOUNDARY bits of alignment, but will be given anything up
2543 to STACK_BOUNDARY bits if the type requires it. This makes sure
2544 that both before and after the layout of each argument, the Next
2545 Stacked Argument Address (NSAA) will have a minimum alignment of
2549 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
2551 struct aarch64_fn_arg_alignment aa
2552 = aarch64_function_arg_alignment (mode
, type
);
2553 aa
.alignment
= MIN (MAX (aa
.alignment
, PARM_BOUNDARY
), STACK_BOUNDARY
);
2555 = MIN (MAX (aa
.warn_alignment
, PARM_BOUNDARY
), STACK_BOUNDARY
);
2557 if (warn_psabi
&& aa
.warn_alignment
> aa
.alignment
)
2558 inform (input_location
, "parameter passing for argument of type %qT "
2559 "changed in GCC 7.1", type
);
2561 return aa
.alignment
;
2564 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2566 Return true if an argument passed on the stack should be padded upwards,
2567 i.e. if the least-significant byte of the stack slot has useful data.
2569 Small aggregate types are placed in the lowest memory address.
2571 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2574 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
2576 /* On little-endian targets, the least significant byte of every stack
2577 argument is passed at the lowest byte address of the stack slot. */
2578 if (!BYTES_BIG_ENDIAN
)
2581 /* Otherwise, integral, floating-point and pointer types are padded downward:
2582 the least significant byte of a stack argument is passed at the highest
2583 byte address of the stack slot. */
2585 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2586 || POINTER_TYPE_P (type
))
2587 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2590 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2594 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2596 It specifies padding for the last (may also be the only)
2597 element of a block move between registers and memory. If
2598 assuming the block is in the memory, padding upward means that
2599 the last element is padded after its highest significant byte,
2600 while in downward padding, the last element is padded at the
2601 its least significant byte side.
2603 Small aggregates and small complex types are always padded
2606 We don't need to worry about homogeneous floating-point or
2607 short-vector aggregates; their move is not affected by the
2608 padding direction determined here. Regardless of endianness,
2609 each element of such an aggregate is put in the least
2610 significant bits of a fp/simd register.
2612 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2613 register has useful data, and return the opposite if the most
2614 significant byte does. */
2617 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2618 bool first ATTRIBUTE_UNUSED
)
2621 /* Small composite types are always padded upward. */
2622 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2624 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2625 : GET_MODE_SIZE (mode
));
2626 if (size
< 2 * UNITS_PER_WORD
)
2630 /* Otherwise, use the default padding. */
2631 return !BYTES_BIG_ENDIAN
;
2635 aarch64_libgcc_cmp_return_mode (void)
2640 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2642 /* We use the 12-bit shifted immediate arithmetic instructions so values
2643 must be multiple of (1 << 12), i.e. 4096. */
2644 #define ARITH_FACTOR 4096
2646 #if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2647 #error Cannot use simple address calculation for stack probing
2650 /* The pair of scratch registers used for stack probing. */
2651 #define PROBE_STACK_FIRST_REG 9
2652 #define PROBE_STACK_SECOND_REG 10
2654 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2655 inclusive. These are offsets from the current stack pointer. */
2658 aarch64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
2660 rtx reg1
= gen_rtx_REG (Pmode
, PROBE_STACK_FIRST_REG
);
2662 /* See the same assertion on PROBE_INTERVAL above. */
2663 gcc_assert ((first
% ARITH_FACTOR
) == 0);
2665 /* See if we have a constant small number of probes to generate. If so,
2666 that's the easy case. */
2667 if (size
<= PROBE_INTERVAL
)
2669 const HOST_WIDE_INT base
= ROUND_UP (size
, ARITH_FACTOR
);
2671 emit_set_insn (reg1
,
2672 plus_constant (Pmode
,
2673 stack_pointer_rtx
, -(first
+ base
)));
2674 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- size
));
2677 /* The run-time loop is made up of 8 insns in the generic case while the
2678 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2679 else if (size
<= 4 * PROBE_INTERVAL
)
2681 HOST_WIDE_INT i
, rem
;
2683 emit_set_insn (reg1
,
2684 plus_constant (Pmode
,
2686 -(first
+ PROBE_INTERVAL
)));
2687 emit_stack_probe (reg1
);
2689 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2690 it exceeds SIZE. If only two probes are needed, this will not
2691 generate any code. Then probe at FIRST + SIZE. */
2692 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
2694 emit_set_insn (reg1
,
2695 plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
2696 emit_stack_probe (reg1
);
2699 rem
= size
- (i
- PROBE_INTERVAL
);
2702 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2704 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -base
));
2705 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- rem
));
2708 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
2711 /* Otherwise, do the same as above, but in a loop. Note that we must be
2712 extra careful with variables wrapping around because we might be at
2713 the very top (or the very bottom) of the address space and we have
2714 to be able to handle this case properly; in particular, we use an
2715 equality test for the loop condition. */
2718 rtx reg2
= gen_rtx_REG (Pmode
, PROBE_STACK_SECOND_REG
);
2720 /* Step 1: round SIZE to the previous multiple of the interval. */
2722 HOST_WIDE_INT rounded_size
= size
& -PROBE_INTERVAL
;
2725 /* Step 2: compute initial and final value of the loop counter. */
2727 /* TEST_ADDR = SP + FIRST. */
2728 emit_set_insn (reg1
,
2729 plus_constant (Pmode
, stack_pointer_rtx
, -first
));
2731 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2732 emit_set_insn (reg2
,
2733 plus_constant (Pmode
, stack_pointer_rtx
,
2734 -(first
+ rounded_size
)));
2741 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2744 while (TEST_ADDR != LAST_ADDR)
2746 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2747 until it is equal to ROUNDED_SIZE. */
2749 emit_insn (gen_probe_stack_range (reg1
, reg1
, reg2
));
2752 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2753 that SIZE is equal to ROUNDED_SIZE. */
2755 if (size
!= rounded_size
)
2757 HOST_WIDE_INT rem
= size
- rounded_size
;
2761 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2763 emit_set_insn (reg2
, plus_constant (Pmode
, reg2
, -base
));
2764 emit_stack_probe (plus_constant (Pmode
, reg2
, base
- rem
));
2767 emit_stack_probe (plus_constant (Pmode
, reg2
, -rem
));
2771 /* Make sure nothing is scheduled before we are done. */
2772 emit_insn (gen_blockage ());
2775 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2776 absolute addresses. */
2779 aarch64_output_probe_stack_range (rtx reg1
, rtx reg2
)
2781 static int labelno
= 0;
2785 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
2788 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
2790 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2792 xops
[1] = GEN_INT (PROBE_INTERVAL
);
2793 output_asm_insn ("sub\t%0, %0, %1", xops
);
2795 /* Probe at TEST_ADDR. */
2796 output_asm_insn ("str\txzr, [%0]", xops
);
2798 /* Test if TEST_ADDR == LAST_ADDR. */
2800 output_asm_insn ("cmp\t%0, %1", xops
);
2803 fputs ("\tb.ne\t", asm_out_file
);
2804 assemble_name_raw (asm_out_file
, loop_lab
);
2805 fputc ('\n', asm_out_file
);
2811 aarch64_frame_pointer_required (void)
2813 /* In aarch64_override_options_after_change
2814 flag_omit_leaf_frame_pointer turns off the frame pointer by
2815 default. Turn it back on now if we've not got a leaf
2817 if (flag_omit_leaf_frame_pointer
2818 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2821 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2822 if (crtl
->calls_eh_return
)
2828 /* Mark the registers that need to be saved by the callee and calculate
2829 the size of the callee-saved registers area and frame record (both FP
2830 and LR may be omitted). */
2832 aarch64_layout_frame (void)
2834 HOST_WIDE_INT offset
= 0;
2835 int regno
, last_fp_reg
= INVALID_REGNUM
;
2837 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2840 #define SLOT_NOT_REQUIRED (-2)
2841 #define SLOT_REQUIRED (-1)
2843 cfun
->machine
->frame
.wb_candidate1
= INVALID_REGNUM
;
2844 cfun
->machine
->frame
.wb_candidate2
= INVALID_REGNUM
;
2846 /* First mark all the registers that really need to be saved... */
2847 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2848 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2850 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2851 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2853 /* ... that includes the eh data registers (if needed)... */
2854 if (crtl
->calls_eh_return
)
2855 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2856 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2859 /* ... and any callee saved register that dataflow says is live. */
2860 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2861 if (df_regs_ever_live_p (regno
)
2862 && (regno
== R30_REGNUM
2863 || !call_used_regs
[regno
]))
2864 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2866 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2867 if (df_regs_ever_live_p (regno
)
2868 && !call_used_regs
[regno
])
2870 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2871 last_fp_reg
= regno
;
2874 if (frame_pointer_needed
)
2876 /* FP and LR are placed in the linkage record. */
2877 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2878 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2879 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2880 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2881 offset
+= 2 * UNITS_PER_WORD
;
2884 /* Now assign stack slots for them. */
2885 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2886 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2888 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2889 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2890 cfun
->machine
->frame
.wb_candidate1
= regno
;
2891 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
)
2892 cfun
->machine
->frame
.wb_candidate2
= regno
;
2893 offset
+= UNITS_PER_WORD
;
2896 HOST_WIDE_INT max_int_offset
= offset
;
2897 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2898 bool has_align_gap
= offset
!= max_int_offset
;
2900 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2901 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2903 /* If there is an alignment gap between integer and fp callee-saves,
2904 allocate the last fp register to it if possible. */
2905 if (regno
== last_fp_reg
&& has_align_gap
&& (offset
& 8) == 0)
2907 cfun
->machine
->frame
.reg_offset
[regno
] = max_int_offset
;
2911 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2912 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2913 cfun
->machine
->frame
.wb_candidate1
= regno
;
2914 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
2915 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2916 cfun
->machine
->frame
.wb_candidate2
= regno
;
2917 offset
+= UNITS_PER_WORD
;
2920 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2922 cfun
->machine
->frame
.saved_regs_size
= offset
;
2924 HOST_WIDE_INT varargs_and_saved_regs_size
2925 = offset
+ cfun
->machine
->frame
.saved_varargs_size
;
2927 cfun
->machine
->frame
.hard_fp_offset
2928 = ROUND_UP (varargs_and_saved_regs_size
+ get_frame_size (),
2929 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2931 cfun
->machine
->frame
.frame_size
2932 = ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2933 + crtl
->outgoing_args_size
,
2934 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2936 cfun
->machine
->frame
.locals_offset
= cfun
->machine
->frame
.saved_varargs_size
;
2938 cfun
->machine
->frame
.initial_adjust
= 0;
2939 cfun
->machine
->frame
.final_adjust
= 0;
2940 cfun
->machine
->frame
.callee_adjust
= 0;
2941 cfun
->machine
->frame
.callee_offset
= 0;
2943 HOST_WIDE_INT max_push_offset
= 0;
2944 if (cfun
->machine
->frame
.wb_candidate2
!= INVALID_REGNUM
)
2945 max_push_offset
= 512;
2946 else if (cfun
->machine
->frame
.wb_candidate1
!= INVALID_REGNUM
)
2947 max_push_offset
= 256;
2949 if (cfun
->machine
->frame
.frame_size
< max_push_offset
2950 && crtl
->outgoing_args_size
== 0)
2952 /* Simple, small frame with no outgoing arguments:
2953 stp reg1, reg2, [sp, -frame_size]!
2954 stp reg3, reg4, [sp, 16] */
2955 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.frame_size
;
2957 else if ((crtl
->outgoing_args_size
2958 + cfun
->machine
->frame
.saved_regs_size
< 512)
2959 && !(cfun
->calls_alloca
2960 && cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
))
2962 /* Frame with small outgoing arguments:
2963 sub sp, sp, frame_size
2964 stp reg1, reg2, [sp, outgoing_args_size]
2965 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2966 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.frame_size
;
2967 cfun
->machine
->frame
.callee_offset
2968 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.hard_fp_offset
;
2970 else if (cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
)
2972 /* Frame with large outgoing arguments but a small local area:
2973 stp reg1, reg2, [sp, -hard_fp_offset]!
2974 stp reg3, reg4, [sp, 16]
2975 sub sp, sp, outgoing_args_size */
2976 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
2977 cfun
->machine
->frame
.final_adjust
2978 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.callee_adjust
;
2980 else if (!frame_pointer_needed
2981 && varargs_and_saved_regs_size
< max_push_offset
)
2983 /* Frame with large local area and outgoing arguments (this pushes the
2984 callee-saves first, followed by the locals and outgoing area):
2985 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
2986 stp reg3, reg4, [sp, 16]
2987 sub sp, sp, frame_size - varargs_and_saved_regs_size */
2988 cfun
->machine
->frame
.callee_adjust
= varargs_and_saved_regs_size
;
2989 cfun
->machine
->frame
.final_adjust
2990 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.callee_adjust
;
2991 cfun
->machine
->frame
.hard_fp_offset
= cfun
->machine
->frame
.callee_adjust
;
2992 cfun
->machine
->frame
.locals_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2996 /* Frame with large local area and outgoing arguments using frame pointer:
2997 sub sp, sp, hard_fp_offset
2998 stp x29, x30, [sp, 0]
3000 stp reg3, reg4, [sp, 16]
3001 sub sp, sp, outgoing_args_size */
3002 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
3003 cfun
->machine
->frame
.final_adjust
3004 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.initial_adjust
;
3007 cfun
->machine
->frame
.laid_out
= true;
3010 /* Return true if the register REGNO is saved on entry to
3011 the current function. */
3014 aarch64_register_saved_on_entry (int regno
)
3016 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
3019 /* Return the next register up from REGNO up to LIMIT for the callee
3023 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
3025 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
3030 /* Push the register number REGNO of mode MODE to the stack with write-back
3031 adjusting the stack by ADJUSTMENT. */
3034 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
3035 HOST_WIDE_INT adjustment
)
3037 rtx base_rtx
= stack_pointer_rtx
;
3040 reg
= gen_rtx_REG (mode
, regno
);
3041 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
3042 plus_constant (Pmode
, base_rtx
, -adjustment
));
3043 mem
= gen_rtx_MEM (mode
, mem
);
3045 insn
= emit_move_insn (mem
, reg
);
3046 RTX_FRAME_RELATED_P (insn
) = 1;
3049 /* Generate and return an instruction to store the pair of registers
3050 REG and REG2 of mode MODE to location BASE with write-back adjusting
3051 the stack location BASE by ADJUSTMENT. */
3054 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3055 HOST_WIDE_INT adjustment
)
3060 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
3061 GEN_INT (-adjustment
),
3062 GEN_INT (UNITS_PER_WORD
- adjustment
));
3064 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
3065 GEN_INT (-adjustment
),
3066 GEN_INT (UNITS_PER_WORD
- adjustment
));
3072 /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3073 stack pointer by ADJUSTMENT. */
3076 aarch64_push_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
)
3079 machine_mode mode
= (regno1
<= R30_REGNUM
) ? DImode
: DFmode
;
3081 if (regno2
== INVALID_REGNUM
)
3082 return aarch64_pushwb_single_reg (mode
, regno1
, adjustment
);
3084 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3085 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3087 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
3089 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
3090 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3091 RTX_FRAME_RELATED_P (insn
) = 1;
3094 /* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3095 adjusting it by ADJUSTMENT afterwards. */
3098 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3099 HOST_WIDE_INT adjustment
)
3104 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3105 GEN_INT (UNITS_PER_WORD
));
3107 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3108 GEN_INT (UNITS_PER_WORD
));
3114 /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3115 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3119 aarch64_pop_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
,
3122 machine_mode mode
= (regno1
<= R30_REGNUM
) ? DImode
: DFmode
;
3123 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3125 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg1
, *cfi_ops
);
3127 if (regno2
== INVALID_REGNUM
)
3129 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, adjustment
);
3130 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
3131 emit_move_insn (reg1
, gen_rtx_MEM (mode
, mem
));
3135 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3136 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3137 emit_insn (aarch64_gen_loadwb_pair (mode
, stack_pointer_rtx
, reg1
,
3142 /* Generate and return a store pair instruction of mode MODE to store
3143 register REG1 to MEM1 and register REG2 to MEM2. */
3146 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
3152 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
3155 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
3162 /* Generate and regurn a load pair isntruction of mode MODE to load register
3163 REG1 from MEM1 and register REG2 from MEM2. */
3166 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
3172 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
3175 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
3182 /* Return TRUE if return address signing should be enabled for the current
3183 function, otherwise return FALSE. */
3186 aarch64_return_address_signing_enabled (void)
3188 /* This function should only be called after frame laid out. */
3189 gcc_assert (cfun
->machine
->frame
.laid_out
);
3191 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3192 if it's LR is pushed onto stack. */
3193 return (aarch64_ra_sign_scope
== AARCH64_FUNCTION_ALL
3194 || (aarch64_ra_sign_scope
== AARCH64_FUNCTION_NON_LEAF
3195 && cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] >= 0));
3198 /* Emit code to save the callee-saved registers from register number START
3199 to LIMIT to the stack at the location starting at offset START_OFFSET,
3200 skipping any write-back candidates if SKIP_WB is true. */
3203 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
3204 unsigned start
, unsigned limit
, bool skip_wb
)
3207 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
3208 ? gen_frame_mem
: gen_rtx_MEM
);
3212 for (regno
= aarch64_next_callee_save (start
, limit
);
3214 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3217 HOST_WIDE_INT offset
;
3220 && (regno
== cfun
->machine
->frame
.wb_candidate1
3221 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3224 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3227 reg
= gen_rtx_REG (mode
, regno
);
3228 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3229 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3232 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3235 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3236 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3237 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3240 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3243 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3244 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3246 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
3249 /* The first part of a frame-related parallel insn is
3250 always assumed to be relevant to the frame
3251 calculations; subsequent parts, are only
3252 frame-related if explicitly marked. */
3253 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3257 insn
= emit_move_insn (mem
, reg
);
3259 RTX_FRAME_RELATED_P (insn
) = 1;
3263 /* Emit code to restore the callee registers of mode MODE from register
3264 number START up to and including LIMIT. Restore from the stack offset
3265 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3266 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3269 aarch64_restore_callee_saves (machine_mode mode
,
3270 HOST_WIDE_INT start_offset
, unsigned start
,
3271 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
3273 rtx base_rtx
= stack_pointer_rtx
;
3274 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
3275 ? gen_frame_mem
: gen_rtx_MEM
);
3278 HOST_WIDE_INT offset
;
3280 for (regno
= aarch64_next_callee_save (start
, limit
);
3282 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3284 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3290 && (regno
== cfun
->machine
->frame
.wb_candidate1
3291 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3294 reg
= gen_rtx_REG (mode
, regno
);
3295 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3296 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3298 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3301 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3302 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3303 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3305 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3308 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3309 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3310 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3312 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3316 emit_move_insn (reg
, mem
);
3317 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
3322 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3323 HOST_WIDE_INT offset
)
3325 return offset
>= -256 && offset
< 256;
3329 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3332 && offset
< 4096 * GET_MODE_SIZE (mode
)
3333 && offset
% GET_MODE_SIZE (mode
) == 0);
3337 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3339 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3340 && offset
< 64 * GET_MODE_SIZE (mode
)
3341 && offset
% GET_MODE_SIZE (mode
) == 0);
3344 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3347 aarch64_get_separate_components (void)
3349 aarch64_layout_frame ();
3351 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3352 bitmap_clear (components
);
3354 /* The registers we need saved to the frame. */
3355 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3356 if (aarch64_register_saved_on_entry (regno
))
3358 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3359 if (!frame_pointer_needed
)
3360 offset
+= cfun
->machine
->frame
.frame_size
3361 - cfun
->machine
->frame
.hard_fp_offset
;
3362 /* Check that we can access the stack slot of the register with one
3363 direct load with no adjustments needed. */
3364 if (offset_12bit_unsigned_scaled_p (DImode
, offset
))
3365 bitmap_set_bit (components
, regno
);
3368 /* Don't mess with the hard frame pointer. */
3369 if (frame_pointer_needed
)
3370 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
3372 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3373 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3374 /* If aarch64_layout_frame has chosen registers to store/restore with
3375 writeback don't interfere with them to avoid having to output explicit
3376 stack adjustment instructions. */
3377 if (reg2
!= INVALID_REGNUM
)
3378 bitmap_clear_bit (components
, reg2
);
3379 if (reg1
!= INVALID_REGNUM
)
3380 bitmap_clear_bit (components
, reg1
);
3382 bitmap_clear_bit (components
, LR_REGNUM
);
3383 bitmap_clear_bit (components
, SP_REGNUM
);
3388 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3391 aarch64_components_for_bb (basic_block bb
)
3393 bitmap in
= DF_LIVE_IN (bb
);
3394 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
3395 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
3397 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3398 bitmap_clear (components
);
3400 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3401 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3402 if ((!call_used_regs
[regno
])
3403 && (bitmap_bit_p (in
, regno
)
3404 || bitmap_bit_p (gen
, regno
)
3405 || bitmap_bit_p (kill
, regno
)))
3406 bitmap_set_bit (components
, regno
);
3411 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3412 Nothing to do for aarch64. */
3415 aarch64_disqualify_components (sbitmap
, edge
, sbitmap
, bool)
3419 /* Return the next set bit in BMP from START onwards. Return the total number
3420 of bits in BMP if no set bit is found at or after START. */
3423 aarch64_get_next_set_bit (sbitmap bmp
, unsigned int start
)
3425 unsigned int nbits
= SBITMAP_SIZE (bmp
);
3429 gcc_assert (start
< nbits
);
3430 for (unsigned int i
= start
; i
< nbits
; i
++)
3431 if (bitmap_bit_p (bmp
, i
))
3437 /* Do the work for aarch64_emit_prologue_components and
3438 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3439 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3440 for these components or the epilogue sequence. That is, it determines
3441 whether we should emit stores or loads and what kind of CFA notes to attach
3442 to the insns. Otherwise the logic for the two sequences is very
3446 aarch64_process_components (sbitmap components
, bool prologue_p
)
3448 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
3449 ? HARD_FRAME_POINTER_REGNUM
3450 : STACK_POINTER_REGNUM
);
3452 unsigned last_regno
= SBITMAP_SIZE (components
);
3453 unsigned regno
= aarch64_get_next_set_bit (components
, R0_REGNUM
);
3454 rtx_insn
*insn
= NULL
;
3456 while (regno
!= last_regno
)
3458 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3459 so DFmode for the vector registers is enough. */
3460 machine_mode mode
= GP_REGNUM_P (regno
) ? DImode
: DFmode
;
3461 rtx reg
= gen_rtx_REG (mode
, regno
);
3462 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3463 if (!frame_pointer_needed
)
3464 offset
+= cfun
->machine
->frame
.frame_size
3465 - cfun
->machine
->frame
.hard_fp_offset
;
3466 rtx addr
= plus_constant (Pmode
, ptr_reg
, offset
);
3467 rtx mem
= gen_frame_mem (mode
, addr
);
3469 rtx set
= prologue_p
? gen_rtx_SET (mem
, reg
) : gen_rtx_SET (reg
, mem
);
3470 unsigned regno2
= aarch64_get_next_set_bit (components
, regno
+ 1);
3471 /* No more registers to handle after REGNO.
3472 Emit a single save/restore and exit. */
3473 if (regno2
== last_regno
)
3475 insn
= emit_insn (set
);
3476 RTX_FRAME_RELATED_P (insn
) = 1;
3478 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3480 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3484 HOST_WIDE_INT offset2
= cfun
->machine
->frame
.reg_offset
[regno2
];
3485 /* The next register is not of the same class or its offset is not
3486 mergeable with the current one into a pair. */
3487 if (!satisfies_constraint_Ump (mem
)
3488 || GP_REGNUM_P (regno
) != GP_REGNUM_P (regno2
)
3489 || (offset2
- cfun
->machine
->frame
.reg_offset
[regno
])
3490 != GET_MODE_SIZE (mode
))
3492 insn
= emit_insn (set
);
3493 RTX_FRAME_RELATED_P (insn
) = 1;
3495 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3497 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3503 /* REGNO2 can be saved/restored in a pair with REGNO. */
3504 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3505 if (!frame_pointer_needed
)
3506 offset2
+= cfun
->machine
->frame
.frame_size
3507 - cfun
->machine
->frame
.hard_fp_offset
;
3508 rtx addr2
= plus_constant (Pmode
, ptr_reg
, offset2
);
3509 rtx mem2
= gen_frame_mem (mode
, addr2
);
3510 rtx set2
= prologue_p
? gen_rtx_SET (mem2
, reg2
)
3511 : gen_rtx_SET (reg2
, mem2
);
3514 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
, reg2
));
3516 insn
= emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3518 RTX_FRAME_RELATED_P (insn
) = 1;
3521 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
3522 add_reg_note (insn
, REG_CFA_OFFSET
, set2
);
3526 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3527 add_reg_note (insn
, REG_CFA_RESTORE
, reg2
);
3530 regno
= aarch64_get_next_set_bit (components
, regno2
+ 1);
3534 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3537 aarch64_emit_prologue_components (sbitmap components
)
3539 aarch64_process_components (components
, true);
3542 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3545 aarch64_emit_epilogue_components (sbitmap components
)
3547 aarch64_process_components (components
, false);
3550 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3553 aarch64_set_handled_components (sbitmap components
)
3555 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3556 if (bitmap_bit_p (components
, regno
))
3557 cfun
->machine
->reg_is_wrapped_separately
[regno
] = true;
3560 /* AArch64 stack frames generated by this compiler look like:
3562 +-------------------------------+
3564 | incoming stack arguments |
3566 +-------------------------------+
3567 | | <-- incoming stack pointer (aligned)
3568 | callee-allocated save area |
3569 | for register varargs |
3571 +-------------------------------+
3572 | local variables | <-- frame_pointer_rtx
3574 +-------------------------------+
3576 +-------------------------------+ |
3577 | callee-saved registers | | frame.saved_regs_size
3578 +-------------------------------+ |
3580 +-------------------------------+ |
3581 | FP' | / <- hard_frame_pointer_rtx (aligned)
3582 +-------------------------------+
3583 | dynamic allocation |
3584 +-------------------------------+
3586 +-------------------------------+
3587 | outgoing stack arguments | <-- arg_pointer
3589 +-------------------------------+
3590 | | <-- stack_pointer_rtx (aligned)
3592 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3593 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3596 /* Generate the prologue instructions for entry into a function.
3597 Establish the stack frame by decreasing the stack pointer with a
3598 properly calculated size and, if necessary, create a frame record
3599 filled with the values of LR and previous frame pointer. The
3600 current FP is also set up if it is in use. */
3603 aarch64_expand_prologue (void)
3605 aarch64_layout_frame ();
3607 HOST_WIDE_INT frame_size
= cfun
->machine
->frame
.frame_size
;
3608 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3609 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3610 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3611 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3612 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3613 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3616 /* Sign return address for functions. */
3617 if (aarch64_return_address_signing_enabled ())
3619 insn
= emit_insn (gen_pacisp ());
3620 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3621 RTX_FRAME_RELATED_P (insn
) = 1;
3624 if (flag_stack_usage_info
)
3625 current_function_static_stack_size
= frame_size
;
3627 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3629 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3631 if (frame_size
> PROBE_INTERVAL
&& frame_size
> STACK_CHECK_PROTECT
)
3632 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3633 frame_size
- STACK_CHECK_PROTECT
);
3635 else if (frame_size
> 0)
3636 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT
, frame_size
);
3639 aarch64_sub_sp (IP0_REGNUM
, initial_adjust
, true);
3641 if (callee_adjust
!= 0)
3642 aarch64_push_regs (reg1
, reg2
, callee_adjust
);
3644 if (frame_pointer_needed
)
3646 if (callee_adjust
== 0)
3647 aarch64_save_callee_saves (DImode
, callee_offset
, R29_REGNUM
,
3649 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
3651 GEN_INT (callee_offset
)));
3652 RTX_FRAME_RELATED_P (insn
) = 1;
3653 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
3656 aarch64_save_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3657 callee_adjust
!= 0 || frame_pointer_needed
);
3658 aarch64_save_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3659 callee_adjust
!= 0 || frame_pointer_needed
);
3660 aarch64_sub_sp (IP1_REGNUM
, final_adjust
, !frame_pointer_needed
);
3663 /* Return TRUE if we can use a simple_return insn.
3665 This function checks whether the callee saved stack is empty, which
3666 means no restore actions are need. The pro_and_epilogue will use
3667 this to check whether shrink-wrapping opt is feasible. */
3670 aarch64_use_return_insn_p (void)
3672 if (!reload_completed
)
3678 aarch64_layout_frame ();
3680 return cfun
->machine
->frame
.frame_size
== 0;
3683 /* Generate the epilogue instructions for returning from a function.
3684 This is almost exactly the reverse of the prolog sequence, except
3685 that we need to insert barriers to avoid scheduling loads that read
3686 from a deallocated stack, and we optimize the unwind records by
3687 emitting them all together if possible. */
3689 aarch64_expand_epilogue (bool for_sibcall
)
3691 aarch64_layout_frame ();
3693 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3694 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3695 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3696 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3697 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3698 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3702 /* We need to add memory barrier to prevent read from deallocated stack. */
3703 bool need_barrier_p
= (get_frame_size ()
3704 + cfun
->machine
->frame
.saved_varargs_size
) != 0;
3706 /* Emit a barrier to prevent loads from a deallocated stack. */
3707 if (final_adjust
> crtl
->outgoing_args_size
|| cfun
->calls_alloca
3708 || crtl
->calls_eh_return
)
3710 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3711 need_barrier_p
= false;
3714 /* Restore the stack pointer from the frame pointer if it may not
3715 be the same as the stack pointer. */
3716 if (frame_pointer_needed
&& (final_adjust
|| cfun
->calls_alloca
))
3718 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
3719 hard_frame_pointer_rtx
,
3720 GEN_INT (-callee_offset
)));
3721 /* If writeback is used when restoring callee-saves, the CFA
3722 is restored on the instruction doing the writeback. */
3723 RTX_FRAME_RELATED_P (insn
) = callee_adjust
== 0;
3726 aarch64_add_sp (IP1_REGNUM
, final_adjust
, df_regs_ever_live_p (IP1_REGNUM
));
3728 aarch64_restore_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3729 callee_adjust
!= 0, &cfi_ops
);
3730 aarch64_restore_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3731 callee_adjust
!= 0, &cfi_ops
);
3734 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3736 if (callee_adjust
!= 0)
3737 aarch64_pop_regs (reg1
, reg2
, callee_adjust
, &cfi_ops
);
3739 if (callee_adjust
!= 0 || initial_adjust
> 65536)
3741 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
3742 insn
= get_last_insn ();
3743 rtx new_cfa
= plus_constant (Pmode
, stack_pointer_rtx
, initial_adjust
);
3744 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
3745 RTX_FRAME_RELATED_P (insn
) = 1;
3749 aarch64_add_sp (IP0_REGNUM
, initial_adjust
, df_regs_ever_live_p (IP0_REGNUM
));
3753 /* Emit delayed restores and reset the CFA to be SP. */
3754 insn
= get_last_insn ();
3755 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, stack_pointer_rtx
, cfi_ops
);
3756 REG_NOTES (insn
) = cfi_ops
;
3757 RTX_FRAME_RELATED_P (insn
) = 1;
3760 /* We prefer to emit the combined return/authenticate instruction RETAA,
3761 however there are three cases in which we must instead emit an explicit
3762 authentication instruction.
3764 1) Sibcalls don't return in a normal way, so if we're about to call one
3765 we must authenticate.
3767 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3768 generating code for !TARGET_ARMV8_3 we can't use it and must
3769 explicitly authenticate.
3771 3) On an eh_return path we make extra stack adjustments to update the
3772 canonical frame address to be the exception handler's CFA. We want
3773 to authenticate using the CFA of the function which calls eh_return.
3775 if (aarch64_return_address_signing_enabled ()
3776 && (for_sibcall
|| !TARGET_ARMV8_3
|| crtl
->calls_eh_return
))
3778 insn
= emit_insn (gen_autisp ());
3779 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3780 RTX_FRAME_RELATED_P (insn
) = 1;
3783 /* Stack adjustment for exception handler. */
3784 if (crtl
->calls_eh_return
)
3786 /* We need to unwind the stack by the offset computed by
3787 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3788 to be SP; letting the CFA move during this adjustment
3789 is just as correct as retaining the CFA from the body
3790 of the function. Therefore, do nothing special. */
3791 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
3794 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
3796 emit_jump_insn (ret_rtx
);
3799 /* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3800 normally or return to a previous frame after unwinding.
3802 An EH return uses a single shared return sequence. The epilogue is
3803 exactly like a normal epilogue except that it has an extra input
3804 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3805 that must be applied after the frame has been destroyed. An extra label
3806 is inserted before the epilogue which initializes this register to zero,
3807 and this is the entry point for a normal return.
3809 An actual EH return updates the return address, initializes the stack
3810 adjustment and jumps directly into the epilogue (bypassing the zeroing
3811 of the adjustment). Since the return address is typically saved on the
3812 stack when a function makes a call, the saved LR must be updated outside
3815 This poses problems as the store is generated well before the epilogue,
3816 so the offset of LR is not known yet. Also optimizations will remove the
3817 store as it appears dead, even after the epilogue is generated (as the
3818 base or offset for loading LR is different in many cases).
3820 To avoid these problems this implementation forces the frame pointer
3821 in eh_return functions so that the location of LR is fixed and known early.
3822 It also marks the store volatile, so no optimization is permitted to
3823 remove the store. */
3825 aarch64_eh_return_handler_rtx (void)
3827 rtx tmp
= gen_frame_mem (Pmode
,
3828 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
3830 /* Mark the store volatile, so no optimization is permitted to remove it. */
3831 MEM_VOLATILE_P (tmp
) = true;
3835 /* Output code to add DELTA to the first argument, and then jump
3836 to FUNCTION. Used for C++ multiple inheritance. */
3838 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
3839 HOST_WIDE_INT delta
,
3840 HOST_WIDE_INT vcall_offset
,
3843 /* The this pointer is always in x0. Note that this differs from
3844 Arm where the this pointer maybe bumped to r1 if r0 is required
3845 to return a pointer to an aggregate. On AArch64 a result value
3846 pointer will be in x8. */
3847 int this_regno
= R0_REGNUM
;
3848 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
3851 reload_completed
= 1;
3852 emit_note (NOTE_INSN_PROLOGUE_END
);
3854 if (vcall_offset
== 0)
3855 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3858 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
3860 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
3861 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
3862 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
3867 if (delta
>= -256 && delta
< 256)
3868 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
3869 plus_constant (Pmode
, this_rtx
, delta
));
3871 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3874 if (Pmode
== ptr_mode
)
3875 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
3877 aarch64_emit_move (temp0
,
3878 gen_rtx_ZERO_EXTEND (Pmode
,
3879 gen_rtx_MEM (ptr_mode
, addr
)));
3881 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
3882 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
3885 aarch64_internal_mov_immediate (temp1
, GEN_INT (vcall_offset
), true,
3887 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
3890 if (Pmode
== ptr_mode
)
3891 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
3893 aarch64_emit_move (temp1
,
3894 gen_rtx_SIGN_EXTEND (Pmode
,
3895 gen_rtx_MEM (ptr_mode
, addr
)));
3897 emit_insn (gen_add2_insn (this_rtx
, temp1
));
3900 /* Generate a tail call to the target function. */
3901 if (!TREE_USED (function
))
3903 assemble_external (function
);
3904 TREE_USED (function
) = 1;
3906 funexp
= XEXP (DECL_RTL (function
), 0);
3907 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3908 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3909 SIBLING_CALL_P (insn
) = 1;
3911 insn
= get_insns ();
3912 shorten_branches (insn
);
3913 final_start_function (insn
, file
, 1);
3914 final (insn
, file
, 1);
3915 final_end_function ();
3917 /* Stop pretending to be a post-reload pass. */
3918 reload_completed
= 0;
3922 aarch64_tls_referenced_p (rtx x
)
3924 if (!TARGET_HAVE_TLS
)
3926 subrtx_iterator::array_type array
;
3927 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3929 const_rtx x
= *iter
;
3930 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3932 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3933 TLS offsets, not real symbol references. */
3934 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3935 iter
.skip_subrtxes ();
3941 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3942 a left shift of 0 or 12 bits. */
3944 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3946 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3947 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3952 /* Return true if val is an immediate that can be loaded into a
3953 register by a MOVZ instruction. */
3955 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3957 if (GET_MODE_SIZE (mode
) > 4)
3959 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3960 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3965 /* Ignore sign extension. */
3966 val
&= (HOST_WIDE_INT
) 0xffffffff;
3968 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3969 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3972 /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3974 static const unsigned HOST_WIDE_INT bitmask_imm_mul
[] =
3976 0x0000000100000001ull
,
3977 0x0001000100010001ull
,
3978 0x0101010101010101ull
,
3979 0x1111111111111111ull
,
3980 0x5555555555555555ull
,
3984 /* Return true if val is a valid bitmask immediate. */
3987 aarch64_bitmask_imm (HOST_WIDE_INT val_in
, machine_mode mode
)
3989 unsigned HOST_WIDE_INT val
, tmp
, mask
, first_one
, next_one
;
3992 /* Check for a single sequence of one bits and return quickly if so.
3993 The special cases of all ones and all zeroes returns false. */
3994 val
= (unsigned HOST_WIDE_INT
) val_in
;
3995 tmp
= val
+ (val
& -val
);
3997 if (tmp
== (tmp
& -tmp
))
3998 return (val
+ 1) > 1;
4000 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
4002 val
= (val
<< 32) | (val
& 0xffffffff);
4004 /* Invert if the immediate doesn't start with a zero bit - this means we
4005 only need to search for sequences of one bits. */
4009 /* Find the first set bit and set tmp to val with the first sequence of one
4010 bits removed. Return success if there is a single sequence of ones. */
4011 first_one
= val
& -val
;
4012 tmp
= val
& (val
+ first_one
);
4017 /* Find the next set bit and compute the difference in bit position. */
4018 next_one
= tmp
& -tmp
;
4019 bits
= clz_hwi (first_one
) - clz_hwi (next_one
);
4022 /* Check the bit position difference is a power of 2, and that the first
4023 sequence of one bits fits within 'bits' bits. */
4024 if ((mask
>> bits
) != 0 || bits
!= (bits
& -bits
))
4027 /* Check the sequence of one bits is repeated 64/bits times. */
4028 return val
== mask
* bitmask_imm_mul
[__builtin_clz (bits
) - 26];
4031 /* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4032 Assumed precondition: VAL_IN Is not zero. */
4034 unsigned HOST_WIDE_INT
4035 aarch64_and_split_imm1 (HOST_WIDE_INT val_in
)
4037 int lowest_bit_set
= ctz_hwi (val_in
);
4038 int highest_bit_set
= floor_log2 (val_in
);
4039 gcc_assert (val_in
!= 0);
4041 return ((HOST_WIDE_INT_UC (2) << highest_bit_set
) -
4042 (HOST_WIDE_INT_1U
<< lowest_bit_set
));
4045 /* Create constant where bits outside of lowest bit set to highest bit set
4048 unsigned HOST_WIDE_INT
4049 aarch64_and_split_imm2 (HOST_WIDE_INT val_in
)
4051 return val_in
| ~aarch64_and_split_imm1 (val_in
);
4054 /* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4057 aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in
, machine_mode mode
)
4059 if (aarch64_bitmask_imm (val_in
, mode
))
4062 if (aarch64_move_imm (val_in
, mode
))
4065 unsigned HOST_WIDE_INT imm2
= aarch64_and_split_imm2 (val_in
);
4067 return aarch64_bitmask_imm (imm2
, mode
);
4070 /* Return true if val is an immediate that can be loaded into a
4071 register in a single instruction. */
4073 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
4075 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
4077 return aarch64_bitmask_imm (val
, mode
);
4081 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
4085 if (GET_CODE (x
) == HIGH
)
4088 split_const (x
, &base
, &offset
);
4089 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
4091 if (aarch64_classify_symbol (base
, offset
)
4092 != SYMBOL_FORCE_TO_MEM
)
4095 /* Avoid generating a 64-bit relocation in ILP32; leave
4096 to aarch64_expand_mov_immediate to handle it properly. */
4097 return mode
!= ptr_mode
;
4100 return aarch64_tls_referenced_p (x
);
4103 /* Implement TARGET_CASE_VALUES_THRESHOLD.
4104 The expansion for a table switch is quite expensive due to the number
4105 of instructions, the table lookup and hard to predict indirect jump.
4106 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4107 set, otherwise use tables for > 16 cases as a tradeoff between size and
4108 performance. When optimizing for size, use the default setting. */
4111 aarch64_case_values_threshold (void)
4113 /* Use the specified limit for the number of cases before using jump
4114 tables at higher optimization levels. */
4116 && selected_cpu
->tune
->max_case_values
!= 0)
4117 return selected_cpu
->tune
->max_case_values
;
4119 return optimize_size
? default_case_values_threshold () : 17;
4122 /* Return true if register REGNO is a valid index register.
4123 STRICT_P is true if REG_OK_STRICT is in effect. */
4126 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
4128 if (!HARD_REGISTER_NUM_P (regno
))
4136 regno
= reg_renumber
[regno
];
4138 return GP_REGNUM_P (regno
);
4141 /* Return true if register REGNO is a valid base register for mode MODE.
4142 STRICT_P is true if REG_OK_STRICT is in effect. */
4145 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
4147 if (!HARD_REGISTER_NUM_P (regno
))
4155 regno
= reg_renumber
[regno
];
4158 /* The fake registers will be eliminated to either the stack or
4159 hard frame pointer, both of which are usually valid base registers.
4160 Reload deals with the cases where the eliminated form isn't valid. */
4161 return (GP_REGNUM_P (regno
)
4162 || regno
== SP_REGNUM
4163 || regno
== FRAME_POINTER_REGNUM
4164 || regno
== ARG_POINTER_REGNUM
);
4167 /* Return true if X is a valid base register for mode MODE.
4168 STRICT_P is true if REG_OK_STRICT is in effect. */
4171 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
4173 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
4176 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
4179 /* Return true if address offset is a valid index. If it is, fill in INFO
4180 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4183 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
4184 machine_mode mode
, bool strict_p
)
4186 enum aarch64_address_type type
;
4191 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
4192 && GET_MODE (x
) == Pmode
)
4194 type
= ADDRESS_REG_REG
;
4198 /* (sign_extend:DI (reg:SI)) */
4199 else if ((GET_CODE (x
) == SIGN_EXTEND
4200 || GET_CODE (x
) == ZERO_EXTEND
)
4201 && GET_MODE (x
) == DImode
4202 && GET_MODE (XEXP (x
, 0)) == SImode
)
4204 type
= (GET_CODE (x
) == SIGN_EXTEND
)
4205 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4206 index
= XEXP (x
, 0);
4209 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4210 else if (GET_CODE (x
) == MULT
4211 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4212 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4213 && GET_MODE (XEXP (x
, 0)) == DImode
4214 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4215 && CONST_INT_P (XEXP (x
, 1)))
4217 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4218 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4219 index
= XEXP (XEXP (x
, 0), 0);
4220 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4222 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4223 else if (GET_CODE (x
) == ASHIFT
4224 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4225 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4226 && GET_MODE (XEXP (x
, 0)) == DImode
4227 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4228 && CONST_INT_P (XEXP (x
, 1)))
4230 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4231 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4232 index
= XEXP (XEXP (x
, 0), 0);
4233 shift
= INTVAL (XEXP (x
, 1));
4235 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4236 else if ((GET_CODE (x
) == SIGN_EXTRACT
4237 || GET_CODE (x
) == ZERO_EXTRACT
)
4238 && GET_MODE (x
) == DImode
4239 && GET_CODE (XEXP (x
, 0)) == MULT
4240 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4241 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4243 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4244 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4245 index
= XEXP (XEXP (x
, 0), 0);
4246 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4247 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4248 || INTVAL (XEXP (x
, 2)) != 0)
4251 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4252 (const_int 0xffffffff<<shift)) */
4253 else if (GET_CODE (x
) == AND
4254 && GET_MODE (x
) == DImode
4255 && GET_CODE (XEXP (x
, 0)) == MULT
4256 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4257 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4258 && CONST_INT_P (XEXP (x
, 1)))
4260 type
= ADDRESS_REG_UXTW
;
4261 index
= XEXP (XEXP (x
, 0), 0);
4262 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4263 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4266 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4267 else if ((GET_CODE (x
) == SIGN_EXTRACT
4268 || GET_CODE (x
) == ZERO_EXTRACT
)
4269 && GET_MODE (x
) == DImode
4270 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4271 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4272 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4274 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4275 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4276 index
= XEXP (XEXP (x
, 0), 0);
4277 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4278 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4279 || INTVAL (XEXP (x
, 2)) != 0)
4282 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4283 (const_int 0xffffffff<<shift)) */
4284 else if (GET_CODE (x
) == AND
4285 && GET_MODE (x
) == DImode
4286 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4287 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4288 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4289 && CONST_INT_P (XEXP (x
, 1)))
4291 type
= ADDRESS_REG_UXTW
;
4292 index
= XEXP (XEXP (x
, 0), 0);
4293 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4294 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4297 /* (mult:P (reg:P) (const_int scale)) */
4298 else if (GET_CODE (x
) == MULT
4299 && GET_MODE (x
) == Pmode
4300 && GET_MODE (XEXP (x
, 0)) == Pmode
4301 && CONST_INT_P (XEXP (x
, 1)))
4303 type
= ADDRESS_REG_REG
;
4304 index
= XEXP (x
, 0);
4305 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4307 /* (ashift:P (reg:P) (const_int shift)) */
4308 else if (GET_CODE (x
) == ASHIFT
4309 && GET_MODE (x
) == Pmode
4310 && GET_MODE (XEXP (x
, 0)) == Pmode
4311 && CONST_INT_P (XEXP (x
, 1)))
4313 type
= ADDRESS_REG_REG
;
4314 index
= XEXP (x
, 0);
4315 shift
= INTVAL (XEXP (x
, 1));
4320 if (GET_CODE (index
) == SUBREG
)
4321 index
= SUBREG_REG (index
);
4324 (shift
> 0 && shift
<= 3
4325 && (1 << shift
) == GET_MODE_SIZE (mode
)))
4327 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
4330 info
->offset
= index
;
4331 info
->shift
= shift
;
4338 /* Return true if MODE is one of the modes for which we
4339 support LDP/STP operations. */
4342 aarch64_mode_valid_for_sched_fusion_p (machine_mode mode
)
4344 return mode
== SImode
|| mode
== DImode
4345 || mode
== SFmode
|| mode
== DFmode
4346 || (aarch64_vector_mode_supported_p (mode
)
4347 && GET_MODE_SIZE (mode
) == 8);
4350 /* Return true if REGNO is a virtual pointer register, or an eliminable
4351 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4352 include stack_pointer or hard_frame_pointer. */
4354 virt_or_elim_regno_p (unsigned regno
)
4356 return ((regno
>= FIRST_VIRTUAL_REGISTER
4357 && regno
<= LAST_VIRTUAL_POINTER_REGISTER
)
4358 || regno
== FRAME_POINTER_REGNUM
4359 || regno
== ARG_POINTER_REGNUM
);
4362 /* Return true if X is a valid address for machine mode MODE. If it is,
4363 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4364 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4367 aarch64_classify_address (struct aarch64_address_info
*info
,
4368 rtx x
, machine_mode mode
,
4369 RTX_CODE outer_code
, bool strict_p
)
4371 enum rtx_code code
= GET_CODE (x
);
4374 /* On BE, we use load/store pair for all large int mode load/stores.
4375 TI/TFmode may also use a load/store pair. */
4376 bool load_store_pair_p
= (outer_code
== PARALLEL
4379 || (BYTES_BIG_ENDIAN
4380 && aarch64_vect_struct_mode_p (mode
)));
4382 bool allow_reg_index_p
=
4384 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
4385 && !aarch64_vect_struct_mode_p (mode
);
4387 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4389 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
4390 && (code
!= POST_INC
&& code
!= REG
))
4397 info
->type
= ADDRESS_REG_IMM
;
4399 info
->offset
= const0_rtx
;
4400 return aarch64_base_register_rtx_p (x
, strict_p
);
4408 && virt_or_elim_regno_p (REGNO (op0
))
4409 && CONST_INT_P (op1
))
4411 info
->type
= ADDRESS_REG_IMM
;
4418 if (GET_MODE_SIZE (mode
) != 0
4419 && CONST_INT_P (op1
)
4420 && aarch64_base_register_rtx_p (op0
, strict_p
))
4422 HOST_WIDE_INT offset
= INTVAL (op1
);
4424 info
->type
= ADDRESS_REG_IMM
;
4428 /* TImode and TFmode values are allowed in both pairs of X
4429 registers and individual Q registers. The available
4431 X,X: 7-bit signed scaled offset
4432 Q: 9-bit signed offset
4433 We conservatively require an offset representable in either mode.
4434 When performing the check for pairs of X registers i.e. LDP/STP
4435 pass down DImode since that is the natural size of the LDP/STP
4436 instruction memory accesses. */
4437 if (mode
== TImode
|| mode
== TFmode
)
4438 return (aarch64_offset_7bit_signed_scaled_p (DImode
, offset
)
4439 && (offset_9bit_signed_unscaled_p (mode
, offset
)
4440 || offset_12bit_unsigned_scaled_p (mode
, offset
)));
4442 /* A 7bit offset check because OImode will emit a ldp/stp
4443 instruction (only big endian will get here).
4444 For ldp/stp instructions, the offset is scaled for the size of a
4445 single element of the pair. */
4447 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
4449 /* Three 9/12 bit offsets checks because CImode will emit three
4450 ldr/str instructions (only big endian will get here). */
4452 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4453 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
4454 || offset_12bit_unsigned_scaled_p (V16QImode
,
4457 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4458 instructions (only big endian will get here). */
4460 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4461 && aarch64_offset_7bit_signed_scaled_p (TImode
,
4464 if (load_store_pair_p
)
4465 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4466 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4468 return (offset_9bit_signed_unscaled_p (mode
, offset
)
4469 || offset_12bit_unsigned_scaled_p (mode
, offset
));
4472 if (allow_reg_index_p
)
4474 /* Look for base + (scaled/extended) index register. */
4475 if (aarch64_base_register_rtx_p (op0
, strict_p
)
4476 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
4481 if (aarch64_base_register_rtx_p (op1
, strict_p
)
4482 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
4495 info
->type
= ADDRESS_REG_WB
;
4496 info
->base
= XEXP (x
, 0);
4497 info
->offset
= NULL_RTX
;
4498 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
4502 info
->type
= ADDRESS_REG_WB
;
4503 info
->base
= XEXP (x
, 0);
4504 if (GET_CODE (XEXP (x
, 1)) == PLUS
4505 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
4506 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
4507 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4509 HOST_WIDE_INT offset
;
4510 info
->offset
= XEXP (XEXP (x
, 1), 1);
4511 offset
= INTVAL (info
->offset
);
4513 /* TImode and TFmode values are allowed in both pairs of X
4514 registers and individual Q registers. The available
4516 X,X: 7-bit signed scaled offset
4517 Q: 9-bit signed offset
4518 We conservatively require an offset representable in either mode.
4520 if (mode
== TImode
|| mode
== TFmode
)
4521 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
4522 && offset_9bit_signed_unscaled_p (mode
, offset
));
4524 if (load_store_pair_p
)
4525 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4526 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4528 return offset_9bit_signed_unscaled_p (mode
, offset
);
4535 /* load literal: pc-relative constant pool entry. Only supported
4536 for SI mode or larger. */
4537 info
->type
= ADDRESS_SYMBOLIC
;
4539 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
4543 split_const (x
, &sym
, &addend
);
4544 return ((GET_CODE (sym
) == LABEL_REF
4545 || (GET_CODE (sym
) == SYMBOL_REF
4546 && CONSTANT_POOL_ADDRESS_P (sym
)
4547 && aarch64_pcrelative_literal_loads
)));
4552 info
->type
= ADDRESS_LO_SUM
;
4553 info
->base
= XEXP (x
, 0);
4554 info
->offset
= XEXP (x
, 1);
4555 if (allow_reg_index_p
4556 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4559 split_const (info
->offset
, &sym
, &offs
);
4560 if (GET_CODE (sym
) == SYMBOL_REF
4561 && (aarch64_classify_symbol (sym
, offs
) == SYMBOL_SMALL_ABSOLUTE
))
4563 /* The symbol and offset must be aligned to the access size. */
4565 unsigned int ref_size
;
4567 if (CONSTANT_POOL_ADDRESS_P (sym
))
4568 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
4569 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
4571 tree exp
= SYMBOL_REF_DECL (sym
);
4572 align
= TYPE_ALIGN (TREE_TYPE (exp
));
4573 align
= CONSTANT_ALIGNMENT (exp
, align
);
4575 else if (SYMBOL_REF_DECL (sym
))
4576 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
4577 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
4578 && SYMBOL_REF_BLOCK (sym
) != NULL
)
4579 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
4581 align
= BITS_PER_UNIT
;
4583 ref_size
= GET_MODE_SIZE (mode
);
4585 ref_size
= GET_MODE_SIZE (DImode
);
4587 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
4588 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
4599 aarch64_symbolic_address_p (rtx x
)
4603 split_const (x
, &x
, &offset
);
4604 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
4607 /* Classify the base of symbolic expression X. */
4609 enum aarch64_symbol_type
4610 aarch64_classify_symbolic_expression (rtx x
)
4614 split_const (x
, &x
, &offset
);
4615 return aarch64_classify_symbol (x
, offset
);
4619 /* Return TRUE if X is a legitimate address for accessing memory in
4622 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
4624 struct aarch64_address_info addr
;
4626 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
4629 /* Return TRUE if X is a legitimate address for accessing memory in
4630 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4633 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
4634 RTX_CODE outer_code
, bool strict_p
)
4636 struct aarch64_address_info addr
;
4638 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
4641 /* Split an out-of-range address displacement into a base and offset.
4642 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4643 to increase opportunities for sharing the base address of different sizes.
4644 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
4646 aarch64_legitimize_address_displacement (rtx
*disp
, rtx
*off
, machine_mode mode
)
4648 HOST_WIDE_INT offset
= INTVAL (*disp
);
4649 HOST_WIDE_INT base
= offset
& ~(GET_MODE_SIZE (mode
) < 4 ? 0xfff : 0x3ffc);
4651 if (mode
== TImode
|| mode
== TFmode
4652 || (offset
& (GET_MODE_SIZE (mode
) - 1)) != 0)
4653 base
= (offset
+ 0x100) & ~0x1ff;
4655 *off
= GEN_INT (base
);
4656 *disp
= GEN_INT (offset
- base
);
4660 /* Return TRUE if rtx X is immediate constant 0.0 */
4662 aarch64_float_const_zero_rtx_p (rtx x
)
4664 if (GET_MODE (x
) == VOIDmode
)
4667 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x
)))
4668 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
4669 return real_equal (CONST_DOUBLE_REAL_VALUE (x
), &dconst0
);
4672 /* Return the fixed registers used for condition codes. */
4675 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
4678 *p2
= INVALID_REGNUM
;
4682 /* Emit call insn with PAT and do aarch64-specific handling. */
4685 aarch64_emit_call_insn (rtx pat
)
4687 rtx insn
= emit_call_insn (pat
);
4689 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
4690 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
4691 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
4695 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
4697 /* All floating point compares return CCFP if it is an equality
4698 comparison, and CCFPE otherwise. */
4699 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
4726 /* Equality comparisons of short modes against zero can be performed
4727 using the TST instruction with the appropriate bitmask. */
4728 if (y
== const0_rtx
&& REG_P (x
)
4729 && (code
== EQ
|| code
== NE
)
4730 && (GET_MODE (x
) == HImode
|| GET_MODE (x
) == QImode
))
4733 /* Similarly, comparisons of zero_extends from shorter modes can
4734 be performed using an ANDS with an immediate mask. */
4735 if (y
== const0_rtx
&& GET_CODE (x
) == ZERO_EXTEND
4736 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4737 && (GET_MODE (XEXP (x
, 0)) == HImode
|| GET_MODE (XEXP (x
, 0)) == QImode
)
4738 && (code
== EQ
|| code
== NE
))
4741 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4743 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
4744 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
4745 || GET_CODE (x
) == NEG
4746 || (GET_CODE (x
) == ZERO_EXTRACT
&& CONST_INT_P (XEXP (x
, 1))
4747 && CONST_INT_P (XEXP (x
, 2)))))
4750 /* A compare with a shifted operand. Because of canonicalization,
4751 the comparison will have to be swapped when we emit the assembly
4753 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4754 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
4755 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
4756 || GET_CODE (x
) == LSHIFTRT
4757 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
4760 /* Similarly for a negated operand, but we can only do this for
4762 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4763 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
4764 && (code
== EQ
|| code
== NE
)
4765 && GET_CODE (x
) == NEG
)
4768 /* A test for unsigned overflow. */
4769 if ((GET_MODE (x
) == DImode
|| GET_MODE (x
) == TImode
)
4771 && GET_CODE (x
) == PLUS
4772 && GET_CODE (y
) == ZERO_EXTEND
)
4775 /* For everything else, return CCmode. */
4780 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
4783 aarch64_get_condition_code (rtx x
)
4785 machine_mode mode
= GET_MODE (XEXP (x
, 0));
4786 enum rtx_code comp_code
= GET_CODE (x
);
4788 if (GET_MODE_CLASS (mode
) != MODE_CC
)
4789 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
4790 return aarch64_get_condition_code_1 (mode
, comp_code
);
4794 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
4802 case GE
: return AARCH64_GE
;
4803 case GT
: return AARCH64_GT
;
4804 case LE
: return AARCH64_LS
;
4805 case LT
: return AARCH64_MI
;
4806 case NE
: return AARCH64_NE
;
4807 case EQ
: return AARCH64_EQ
;
4808 case ORDERED
: return AARCH64_VC
;
4809 case UNORDERED
: return AARCH64_VS
;
4810 case UNLT
: return AARCH64_LT
;
4811 case UNLE
: return AARCH64_LE
;
4812 case UNGT
: return AARCH64_HI
;
4813 case UNGE
: return AARCH64_PL
;
4821 case NE
: return AARCH64_NE
;
4822 case EQ
: return AARCH64_EQ
;
4823 case GE
: return AARCH64_GE
;
4824 case GT
: return AARCH64_GT
;
4825 case LE
: return AARCH64_LE
;
4826 case LT
: return AARCH64_LT
;
4827 case GEU
: return AARCH64_CS
;
4828 case GTU
: return AARCH64_HI
;
4829 case LEU
: return AARCH64_LS
;
4830 case LTU
: return AARCH64_CC
;
4838 case NE
: return AARCH64_NE
;
4839 case EQ
: return AARCH64_EQ
;
4840 case GE
: return AARCH64_LE
;
4841 case GT
: return AARCH64_LT
;
4842 case LE
: return AARCH64_GE
;
4843 case LT
: return AARCH64_GT
;
4844 case GEU
: return AARCH64_LS
;
4845 case GTU
: return AARCH64_CC
;
4846 case LEU
: return AARCH64_CS
;
4847 case LTU
: return AARCH64_HI
;
4855 case NE
: return AARCH64_NE
;
4856 case EQ
: return AARCH64_EQ
;
4857 case GE
: return AARCH64_PL
;
4858 case LT
: return AARCH64_MI
;
4866 case NE
: return AARCH64_NE
;
4867 case EQ
: return AARCH64_EQ
;
4875 case NE
: return AARCH64_CS
;
4876 case EQ
: return AARCH64_CC
;
4889 aarch64_const_vec_all_same_in_range_p (rtx x
,
4890 HOST_WIDE_INT minval
,
4891 HOST_WIDE_INT maxval
)
4893 HOST_WIDE_INT firstval
;
4896 if (GET_CODE (x
) != CONST_VECTOR
4897 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
4900 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
4901 if (firstval
< minval
|| firstval
> maxval
)
4904 count
= CONST_VECTOR_NUNITS (x
);
4905 for (i
= 1; i
< count
; i
++)
4906 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
4913 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
4915 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
4920 #define AARCH64_CC_V 1
4921 #define AARCH64_CC_C (1 << 1)
4922 #define AARCH64_CC_Z (1 << 2)
4923 #define AARCH64_CC_N (1 << 3)
4925 /* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4926 static const int aarch64_nzcv_codes
[] =
4928 0, /* EQ, Z == 1. */
4929 AARCH64_CC_Z
, /* NE, Z == 0. */
4930 0, /* CS, C == 1. */
4931 AARCH64_CC_C
, /* CC, C == 0. */
4932 0, /* MI, N == 1. */
4933 AARCH64_CC_N
, /* PL, N == 0. */
4934 0, /* VS, V == 1. */
4935 AARCH64_CC_V
, /* VC, V == 0. */
4936 0, /* HI, C ==1 && Z == 0. */
4937 AARCH64_CC_C
, /* LS, !(C == 1 && Z == 0). */
4938 AARCH64_CC_V
, /* GE, N == V. */
4939 0, /* LT, N != V. */
4940 AARCH64_CC_Z
, /* GT, Z == 0 && N == V. */
4941 0, /* LE, !(Z == 0 && N == V). */
4947 aarch64_print_operand (FILE *f
, rtx x
, int code
)
4951 /* An integer or symbol address without a preceding # sign. */
4953 switch (GET_CODE (x
))
4956 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4960 output_addr_const (f
, x
);
4964 if (GET_CODE (XEXP (x
, 0)) == PLUS
4965 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4967 output_addr_const (f
, x
);
4973 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4978 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4982 if (!CONST_INT_P (x
)
4983 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4985 output_operand_lossage ("invalid operand for '%%%c'", code
);
5001 output_operand_lossage ("invalid operand for '%%%c'", code
);
5011 /* Print N such that 2^N == X. */
5012 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
5014 output_operand_lossage ("invalid operand for '%%%c'", code
);
5018 asm_fprintf (f
, "%d", n
);
5023 /* Print the number of non-zero bits in X (a const_int). */
5024 if (!CONST_INT_P (x
))
5026 output_operand_lossage ("invalid operand for '%%%c'", code
);
5030 asm_fprintf (f
, "%u", popcount_hwi (INTVAL (x
)));
5034 /* Print the higher numbered register of a pair (TImode) of regs. */
5035 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
5037 output_operand_lossage ("invalid operand for '%%%c'", code
);
5041 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
5048 /* Print a condition (eq, ne, etc) or its inverse. */
5050 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5051 if (x
== const_true_rtx
)
5058 if (!COMPARISON_P (x
))
5060 output_operand_lossage ("invalid operand for '%%%c'", code
);
5064 cond_code
= aarch64_get_condition_code (x
);
5065 gcc_assert (cond_code
>= 0);
5067 cond_code
= AARCH64_INVERSE_CONDITION_CODE (cond_code
);
5068 fputs (aarch64_condition_codes
[cond_code
], f
);
5077 /* Print a scalar FP/SIMD register name. */
5078 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5080 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5083 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
5090 /* Print the first FP/SIMD register name in a list. */
5091 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5093 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5096 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
5100 /* Print a scalar FP/SIMD register name + 1. */
5101 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5103 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5106 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
5110 /* Print bottom 16 bits of integer constant in hex. */
5111 if (!CONST_INT_P (x
))
5113 output_operand_lossage ("invalid operand for '%%%c'", code
);
5116 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
5121 /* Print a general register name or the zero register (32-bit or
5124 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
5126 asm_fprintf (f
, "%czr", code
);
5130 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
5132 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
5136 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
5138 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
5145 /* Print a normal operand, if it's a general register, then we
5149 output_operand_lossage ("missing operand");
5153 switch (GET_CODE (x
))
5156 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
5160 output_address (GET_MODE (x
), XEXP (x
, 0));
5166 output_addr_const (asm_out_file
, x
);
5170 asm_fprintf (f
, "%wd", INTVAL (x
));
5174 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
5177 aarch64_const_vec_all_same_in_range_p (x
,
5179 HOST_WIDE_INT_MAX
));
5180 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
5182 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
5191 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5192 be getting CONST_DOUBLEs holding integers. */
5193 gcc_assert (GET_MODE (x
) != VOIDmode
);
5194 if (aarch64_float_const_zero_rtx_p (x
))
5199 else if (aarch64_float_const_representable_p (x
))
5202 char float_buf
[buf_size
] = {'\0'};
5203 real_to_decimal_for_mode (float_buf
,
5204 CONST_DOUBLE_REAL_VALUE (x
),
5207 asm_fprintf (asm_out_file
, "%s", float_buf
);
5211 output_operand_lossage ("invalid constant");
5214 output_operand_lossage ("invalid operand");
5220 if (GET_CODE (x
) == HIGH
)
5223 switch (aarch64_classify_symbolic_expression (x
))
5225 case SYMBOL_SMALL_GOT_4G
:
5226 asm_fprintf (asm_out_file
, ":got:");
5229 case SYMBOL_SMALL_TLSGD
:
5230 asm_fprintf (asm_out_file
, ":tlsgd:");
5233 case SYMBOL_SMALL_TLSDESC
:
5234 asm_fprintf (asm_out_file
, ":tlsdesc:");
5237 case SYMBOL_SMALL_TLSIE
:
5238 asm_fprintf (asm_out_file
, ":gottprel:");
5241 case SYMBOL_TLSLE24
:
5242 asm_fprintf (asm_out_file
, ":tprel:");
5245 case SYMBOL_TINY_GOT
:
5252 output_addr_const (asm_out_file
, x
);
5256 switch (aarch64_classify_symbolic_expression (x
))
5258 case SYMBOL_SMALL_GOT_4G
:
5259 asm_fprintf (asm_out_file
, ":lo12:");
5262 case SYMBOL_SMALL_TLSGD
:
5263 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
5266 case SYMBOL_SMALL_TLSDESC
:
5267 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
5270 case SYMBOL_SMALL_TLSIE
:
5271 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
5274 case SYMBOL_TLSLE12
:
5275 asm_fprintf (asm_out_file
, ":tprel_lo12:");
5278 case SYMBOL_TLSLE24
:
5279 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
5282 case SYMBOL_TINY_GOT
:
5283 asm_fprintf (asm_out_file
, ":got:");
5286 case SYMBOL_TINY_TLSIE
:
5287 asm_fprintf (asm_out_file
, ":gottprel:");
5293 output_addr_const (asm_out_file
, x
);
5298 switch (aarch64_classify_symbolic_expression (x
))
5300 case SYMBOL_TLSLE24
:
5301 asm_fprintf (asm_out_file
, ":tprel_hi12:");
5306 output_addr_const (asm_out_file
, x
);
5311 HOST_WIDE_INT cond_code
;
5314 if (!CONST_INT_P (x
))
5316 output_operand_lossage ("invalid operand for '%%%c'", code
);
5320 cond_code
= INTVAL (x
);
5321 gcc_assert (cond_code
>= 0 && cond_code
<= AARCH64_NV
);
5322 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
]);
5327 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
5333 aarch64_print_operand_address (FILE *f
, machine_mode mode
, rtx x
)
5335 struct aarch64_address_info addr
;
5337 if (aarch64_classify_address (&addr
, x
, mode
, MEM
, true))
5340 case ADDRESS_REG_IMM
:
5341 if (addr
.offset
== const0_rtx
)
5342 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
5344 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
5345 INTVAL (addr
.offset
));
5348 case ADDRESS_REG_REG
:
5349 if (addr
.shift
== 0)
5350 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
5351 reg_names
[REGNO (addr
.offset
)]);
5353 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
5354 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
5357 case ADDRESS_REG_UXTW
:
5358 if (addr
.shift
== 0)
5359 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
5360 REGNO (addr
.offset
) - R0_REGNUM
);
5362 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
5363 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5366 case ADDRESS_REG_SXTW
:
5367 if (addr
.shift
== 0)
5368 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
5369 REGNO (addr
.offset
) - R0_REGNUM
);
5371 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
5372 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5375 case ADDRESS_REG_WB
:
5376 switch (GET_CODE (x
))
5379 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
5380 GET_MODE_SIZE (mode
));
5383 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
5384 GET_MODE_SIZE (mode
));
5387 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
5388 GET_MODE_SIZE (mode
));
5391 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
5392 GET_MODE_SIZE (mode
));
5395 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
5396 INTVAL (addr
.offset
));
5399 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
5400 INTVAL (addr
.offset
));
5407 case ADDRESS_LO_SUM
:
5408 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
5409 output_addr_const (f
, addr
.offset
);
5410 asm_fprintf (f
, "]");
5413 case ADDRESS_SYMBOLIC
:
5417 output_addr_const (f
, x
);
5421 aarch64_label_mentioned_p (rtx x
)
5426 if (GET_CODE (x
) == LABEL_REF
)
5429 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5430 referencing instruction, but they are constant offsets, not
5432 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
5435 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
5436 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5442 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5443 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
5446 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
5453 /* Implement REGNO_REG_CLASS. */
5456 aarch64_regno_regclass (unsigned regno
)
5458 if (GP_REGNUM_P (regno
))
5459 return GENERAL_REGS
;
5461 if (regno
== SP_REGNUM
)
5464 if (regno
== FRAME_POINTER_REGNUM
5465 || regno
== ARG_POINTER_REGNUM
)
5466 return POINTER_REGS
;
5468 if (FP_REGNUM_P (regno
))
5469 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
5475 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
5477 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5478 where mask is selected by alignment and size of the offset.
5479 We try to pick as large a range for the offset as possible to
5480 maximize the chance of a CSE. However, for aligned addresses
5481 we limit the range to 4k so that structures with different sized
5482 elements are likely to use the same base. We need to be careful
5483 not to split a CONST for some forms of address expression, otherwise
5484 it will generate sub-optimal code. */
5486 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
5488 rtx base
= XEXP (x
, 0);
5489 rtx offset_rtx
= XEXP (x
, 1);
5490 HOST_WIDE_INT offset
= INTVAL (offset_rtx
);
5492 if (GET_CODE (base
) == PLUS
)
5494 rtx op0
= XEXP (base
, 0);
5495 rtx op1
= XEXP (base
, 1);
5497 /* Force any scaling into a temp for CSE. */
5498 op0
= force_reg (Pmode
, op0
);
5499 op1
= force_reg (Pmode
, op1
);
5501 /* Let the pointer register be in op0. */
5502 if (REG_POINTER (op1
))
5503 std::swap (op0
, op1
);
5505 /* If the pointer is virtual or frame related, then we know that
5506 virtual register instantiation or register elimination is going
5507 to apply a second constant. We want the two constants folded
5508 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5509 if (virt_or_elim_regno_p (REGNO (op0
)))
5511 base
= expand_binop (Pmode
, add_optab
, op0
, offset_rtx
,
5512 NULL_RTX
, true, OPTAB_DIRECT
);
5513 return gen_rtx_PLUS (Pmode
, base
, op1
);
5516 /* Otherwise, in order to encourage CSE (and thence loop strength
5517 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5518 base
= expand_binop (Pmode
, add_optab
, op0
, op1
,
5519 NULL_RTX
, true, OPTAB_DIRECT
);
5520 x
= gen_rtx_PLUS (Pmode
, base
, offset_rtx
);
5523 /* Does it look like we'll need a 16-byte load/store-pair operation? */
5524 HOST_WIDE_INT base_offset
;
5525 if (GET_MODE_SIZE (mode
) > 16)
5526 base_offset
= (offset
+ 0x400) & ~0x7f0;
5527 /* For offsets aren't a multiple of the access size, the limit is
5529 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
5531 base_offset
= (offset
+ 0x100) & ~0x1ff;
5533 /* BLKmode typically uses LDP of X-registers. */
5534 if (mode
== BLKmode
)
5535 base_offset
= (offset
+ 512) & ~0x3ff;
5537 /* Small negative offsets are supported. */
5538 else if (IN_RANGE (offset
, -256, 0))
5540 else if (mode
== TImode
|| mode
== TFmode
)
5541 base_offset
= (offset
+ 0x100) & ~0x1ff;
5542 /* Use 12-bit offset by access size. */
5544 base_offset
= offset
& (~0xfff * GET_MODE_SIZE (mode
));
5546 if (base_offset
!= 0)
5548 base
= plus_constant (Pmode
, base
, base_offset
);
5549 base
= force_operand (base
, NULL_RTX
);
5550 return plus_constant (Pmode
, base
, offset
- base_offset
);
5557 /* Return the reload icode required for a constant pool in mode. */
5558 static enum insn_code
5559 aarch64_constant_pool_reload_icode (machine_mode mode
)
5564 return CODE_FOR_aarch64_reload_movcpsfdi
;
5567 return CODE_FOR_aarch64_reload_movcpdfdi
;
5570 return CODE_FOR_aarch64_reload_movcptfdi
;
5573 return CODE_FOR_aarch64_reload_movcpv8qidi
;
5576 return CODE_FOR_aarch64_reload_movcpv16qidi
;
5579 return CODE_FOR_aarch64_reload_movcpv4hidi
;
5582 return CODE_FOR_aarch64_reload_movcpv8hidi
;
5585 return CODE_FOR_aarch64_reload_movcpv2sidi
;
5588 return CODE_FOR_aarch64_reload_movcpv4sidi
;
5591 return CODE_FOR_aarch64_reload_movcpv2didi
;
5594 return CODE_FOR_aarch64_reload_movcpv2dfdi
;
5603 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
5606 secondary_reload_info
*sri
)
5609 /* If we have to disable direct literal pool loads and stores because the
5610 function is too big, then we need a scratch register. */
5611 if (MEM_P (x
) && GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
)
5612 && (SCALAR_FLOAT_MODE_P (GET_MODE (x
))
5613 || targetm
.vector_mode_supported_p (GET_MODE (x
)))
5614 && !aarch64_pcrelative_literal_loads
)
5616 sri
->icode
= aarch64_constant_pool_reload_icode (mode
);
5620 /* Without the TARGET_SIMD instructions we cannot move a Q register
5621 to a Q register directly. We need a scratch. */
5622 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
5623 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
5624 && reg_class_subset_p (rclass
, FP_REGS
))
5627 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
5628 else if (mode
== TImode
)
5629 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
5633 /* A TFmode or TImode memory access should be handled via an FP_REGS
5634 because AArch64 has richer addressing modes for LDR/STR instructions
5635 than LDP/STP instructions. */
5636 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
5637 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
5640 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
5641 return GENERAL_REGS
;
5647 aarch64_can_eliminate (const int from
, const int to
)
5649 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5650 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5652 if (frame_pointer_needed
)
5654 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5656 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
5658 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
5659 && !cfun
->calls_alloca
)
5661 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5668 /* If we decided that we didn't need a leaf frame pointer but then used
5669 LR in the function, then we'll want a frame pointer after all, so
5670 prevent this elimination to ensure a frame pointer is used. */
5671 if (to
== STACK_POINTER_REGNUM
5672 && flag_omit_leaf_frame_pointer
5673 && df_regs_ever_live_p (LR_REGNUM
))
5681 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
5683 aarch64_layout_frame ();
5685 if (to
== HARD_FRAME_POINTER_REGNUM
)
5687 if (from
== ARG_POINTER_REGNUM
)
5688 return cfun
->machine
->frame
.hard_fp_offset
;
5690 if (from
== FRAME_POINTER_REGNUM
)
5691 return cfun
->machine
->frame
.hard_fp_offset
5692 - cfun
->machine
->frame
.locals_offset
;
5695 if (to
== STACK_POINTER_REGNUM
)
5697 if (from
== FRAME_POINTER_REGNUM
)
5698 return cfun
->machine
->frame
.frame_size
5699 - cfun
->machine
->frame
.locals_offset
;
5702 return cfun
->machine
->frame
.frame_size
;
5705 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
5709 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
5713 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
5718 aarch64_asm_trampoline_template (FILE *f
)
5722 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
5723 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
5727 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
5728 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
5730 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
5731 assemble_aligned_integer (4, const0_rtx
);
5732 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5733 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5737 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
5739 rtx fnaddr
, mem
, a_tramp
;
5740 const int tramp_code_sz
= 16;
5742 /* Don't need to copy the trailing D-words, we fill those in below. */
5743 emit_block_move (m_tramp
, assemble_trampoline_template (),
5744 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
5745 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
5746 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5747 if (GET_MODE (fnaddr
) != ptr_mode
)
5748 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
5749 emit_move_insn (mem
, fnaddr
);
5751 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
5752 emit_move_insn (mem
, chain_value
);
5754 /* XXX We should really define a "clear_cache" pattern and use
5755 gen_clear_cache(). */
5756 a_tramp
= XEXP (m_tramp
, 0);
5757 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
5758 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
5759 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
5763 static unsigned char
5764 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
5768 case CALLER_SAVE_REGS
:
5775 aarch64_vector_mode_p (mode
)
5776 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
5777 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5791 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
5793 if (regclass
== POINTER_REGS
)
5794 return GENERAL_REGS
;
5796 if (regclass
== STACK_REG
)
5799 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
5805 /* If it's an integer immediate that MOVI can't handle, then
5806 FP_REGS is not an option, so we return NO_REGS instead. */
5807 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
5808 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
5811 /* Register eliminiation can result in a request for
5812 SP+constant->FP_REGS. We cannot support such operations which
5813 use SP as source and an FP_REG as destination, so reject out
5815 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
5817 rtx lhs
= XEXP (x
, 0);
5819 /* Look through a possible SUBREG introduced by ILP32. */
5820 if (GET_CODE (lhs
) == SUBREG
)
5821 lhs
= SUBREG_REG (lhs
);
5823 gcc_assert (REG_P (lhs
));
5824 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
5833 aarch64_asm_output_labelref (FILE* f
, const char *name
)
5835 asm_fprintf (f
, "%U%s", name
);
5839 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5841 if (priority
== DEFAULT_INIT_PRIORITY
)
5842 default_ctor_section_asm_out_constructor (symbol
, priority
);
5846 /* While priority is known to be in range [0, 65535], so 18 bytes
5847 would be enough, the compiler might not know that. To avoid
5848 -Wformat-truncation false positive, use a larger size. */
5850 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5851 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5852 switch_to_section (s
);
5853 assemble_align (POINTER_SIZE
);
5854 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5859 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5861 if (priority
== DEFAULT_INIT_PRIORITY
)
5862 default_dtor_section_asm_out_destructor (symbol
, priority
);
5866 /* While priority is known to be in range [0, 65535], so 18 bytes
5867 would be enough, the compiler might not know that. To avoid
5868 -Wformat-truncation false positive, use a larger size. */
5870 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5871 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5872 switch_to_section (s
);
5873 assemble_align (POINTER_SIZE
);
5874 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5879 aarch64_output_casesi (rtx
*operands
)
5883 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5885 static const char *const patterns
[4][2] =
5888 "ldrb\t%w3, [%0,%w1,uxtw]",
5889 "add\t%3, %4, %w3, sxtb #2"
5892 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5893 "add\t%3, %4, %w3, sxth #2"
5896 "ldr\t%w3, [%0,%w1,uxtw #2]",
5897 "add\t%3, %4, %w3, sxtw #2"
5899 /* We assume that DImode is only generated when not optimizing and
5900 that we don't really need 64-bit address offsets. That would
5901 imply an object file with 8GB of code in a single function! */
5903 "ldr\t%w3, [%0,%w1,uxtw #2]",
5904 "add\t%3, %4, %w3, sxtw #2"
5908 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5910 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5912 gcc_assert (index
>= 0 && index
<= 3);
5914 /* Need to implement table size reduction, by chaning the code below. */
5915 output_asm_insn (patterns
[index
][0], operands
);
5916 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5917 snprintf (buf
, sizeof (buf
),
5918 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5919 output_asm_insn (buf
, operands
);
5920 output_asm_insn (patterns
[index
][1], operands
);
5921 output_asm_insn ("br\t%3", operands
);
5922 assemble_label (asm_out_file
, label
);
5927 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5928 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5932 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5934 if (shift
>= 0 && shift
<= 3)
5937 for (size
= 8; size
<= 32; size
*= 2)
5939 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5940 if (mask
== bits
<< shift
)
5947 /* Constant pools are per function only when PC relative
5948 literal loads are true or we are in the large memory
5952 aarch64_can_use_per_function_literal_pools_p (void)
5954 return (aarch64_pcrelative_literal_loads
5955 || aarch64_cmodel
== AARCH64_CMODEL_LARGE
);
5959 aarch64_use_blocks_for_constant_p (machine_mode
, const_rtx
)
5961 /* Fixme:: In an ideal world this would work similar
5962 to the logic in aarch64_select_rtx_section but this
5963 breaks bootstrap in gcc go. For now we workaround
5964 this by returning false here. */
5968 /* Select appropriate section for constants depending
5969 on where we place literal pools. */
5972 aarch64_select_rtx_section (machine_mode mode
,
5974 unsigned HOST_WIDE_INT align
)
5976 if (aarch64_can_use_per_function_literal_pools_p ())
5977 return function_section (current_function_decl
);
5979 return default_elf_select_rtx_section (mode
, x
, align
);
5982 /* Implement ASM_OUTPUT_POOL_EPILOGUE. */
5984 aarch64_asm_output_pool_epilogue (FILE *f
, const char *, tree
,
5985 HOST_WIDE_INT offset
)
5987 /* When using per-function literal pools, we must ensure that any code
5988 section is aligned to the minimal instruction length, lest we get
5989 errors from the assembler re "unaligned instructions". */
5990 if ((offset
& 3) && aarch64_can_use_per_function_literal_pools_p ())
5991 ASM_OUTPUT_ALIGN (f
, 2);
5996 /* Helper function for rtx cost calculation. Strip a shift expression
5997 from X. Returns the inner operand if successful, or the original
5998 expression on failure. */
6000 aarch64_strip_shift (rtx x
)
6004 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6005 we can convert both to ROR during final output. */
6006 if ((GET_CODE (op
) == ASHIFT
6007 || GET_CODE (op
) == ASHIFTRT
6008 || GET_CODE (op
) == LSHIFTRT
6009 || GET_CODE (op
) == ROTATERT
6010 || GET_CODE (op
) == ROTATE
)
6011 && CONST_INT_P (XEXP (op
, 1)))
6012 return XEXP (op
, 0);
6014 if (GET_CODE (op
) == MULT
6015 && CONST_INT_P (XEXP (op
, 1))
6016 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
6017 return XEXP (op
, 0);
6022 /* Helper function for rtx cost calculation. Strip an extend
6023 expression from X. Returns the inner operand if successful, or the
6024 original expression on failure. We deal with a number of possible
6025 canonicalization variations here. */
6027 aarch64_strip_extend (rtx x
)
6031 /* Zero and sign extraction of a widened value. */
6032 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
6033 && XEXP (op
, 2) == const0_rtx
6034 && GET_CODE (XEXP (op
, 0)) == MULT
6035 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
6037 return XEXP (XEXP (op
, 0), 0);
6039 /* It can also be represented (for zero-extend) as an AND with an
6041 if (GET_CODE (op
) == AND
6042 && GET_CODE (XEXP (op
, 0)) == MULT
6043 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
6044 && CONST_INT_P (XEXP (op
, 1))
6045 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
6046 INTVAL (XEXP (op
, 1))) != 0)
6047 return XEXP (XEXP (op
, 0), 0);
6049 /* Now handle extended register, as this may also have an optional
6050 left shift by 1..4. */
6051 if (GET_CODE (op
) == ASHIFT
6052 && CONST_INT_P (XEXP (op
, 1))
6053 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
6056 if (GET_CODE (op
) == ZERO_EXTEND
6057 || GET_CODE (op
) == SIGN_EXTEND
)
6066 /* Return true iff CODE is a shift supported in combination
6067 with arithmetic instructions. */
6070 aarch64_shift_p (enum rtx_code code
)
6072 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
6075 /* Helper function for rtx cost calculation. Calculate the cost of
6076 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6077 Return the calculated cost of the expression, recursing manually in to
6078 operands where needed. */
6081 aarch64_rtx_mult_cost (rtx x
, enum rtx_code code
, int outer
, bool speed
)
6084 const struct cpu_cost_table
*extra_cost
6085 = aarch64_tune_params
.insn_extra_cost
;
6087 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
6088 machine_mode mode
= GET_MODE (x
);
6090 gcc_checking_assert (code
== MULT
);
6095 if (VECTOR_MODE_P (mode
))
6096 mode
= GET_MODE_INNER (mode
);
6098 /* Integer multiply/fma. */
6099 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6101 /* The multiply will be canonicalized as a shift, cost it as such. */
6102 if (aarch64_shift_p (GET_CODE (x
))
6103 || (CONST_INT_P (op1
)
6104 && exact_log2 (INTVAL (op1
)) > 0))
6106 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
6107 || GET_CODE (op0
) == SIGN_EXTEND
;
6113 /* ARITH + shift-by-register. */
6114 cost
+= extra_cost
->alu
.arith_shift_reg
;
6116 /* ARITH + extended register. We don't have a cost field
6117 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6118 cost
+= extra_cost
->alu
.extend_arith
;
6120 /* ARITH + shift-by-immediate. */
6121 cost
+= extra_cost
->alu
.arith_shift
;
6124 /* LSL (immediate). */
6125 cost
+= extra_cost
->alu
.shift
;
6128 /* Strip extends as we will have costed them in the case above. */
6130 op0
= aarch64_strip_extend (op0
);
6132 cost
+= rtx_cost (op0
, VOIDmode
, code
, 0, speed
);
6137 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6138 compound and let the below cases handle it. After all, MNEG is a
6139 special-case alias of MSUB. */
6140 if (GET_CODE (op0
) == NEG
)
6142 op0
= XEXP (op0
, 0);
6146 /* Integer multiplies or FMAs have zero/sign extending variants. */
6147 if ((GET_CODE (op0
) == ZERO_EXTEND
6148 && GET_CODE (op1
) == ZERO_EXTEND
)
6149 || (GET_CODE (op0
) == SIGN_EXTEND
6150 && GET_CODE (op1
) == SIGN_EXTEND
))
6152 cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, MULT
, 0, speed
);
6153 cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, MULT
, 1, speed
);
6158 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
6159 cost
+= extra_cost
->mult
[0].extend_add
;
6161 /* MUL/SMULL/UMULL. */
6162 cost
+= extra_cost
->mult
[0].extend
;
6168 /* This is either an integer multiply or a MADD. In both cases
6169 we want to recurse and cost the operands. */
6170 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6171 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6177 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
6180 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
6189 /* Floating-point FMA/FMUL can also support negations of the
6190 operands, unless the rounding mode is upward or downward in
6191 which case FNMUL is different than FMUL with operand negation. */
6192 bool neg0
= GET_CODE (op0
) == NEG
;
6193 bool neg1
= GET_CODE (op1
) == NEG
;
6194 if (compound_p
|| !flag_rounding_math
|| (neg0
&& neg1
))
6197 op0
= XEXP (op0
, 0);
6199 op1
= XEXP (op1
, 0);
6203 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6204 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6207 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
6210 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6211 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6217 aarch64_address_cost (rtx x
,
6219 addr_space_t as ATTRIBUTE_UNUSED
,
6222 enum rtx_code c
= GET_CODE (x
);
6223 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
.addr_cost
;
6224 struct aarch64_address_info info
;
6228 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
6230 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
6232 /* This is a CONST or SYMBOL ref which will be split
6233 in a different way depending on the code model in use.
6234 Cost it through the generic infrastructure. */
6235 int cost_symbol_ref
= rtx_cost (x
, Pmode
, MEM
, 1, speed
);
6236 /* Divide through by the cost of one instruction to
6237 bring it to the same units as the address costs. */
6238 cost_symbol_ref
/= COSTS_N_INSNS (1);
6239 /* The cost is then the cost of preparing the address,
6240 followed by an immediate (possibly 0) offset. */
6241 return cost_symbol_ref
+ addr_cost
->imm_offset
;
6245 /* This is most likely a jump table from a case
6247 return addr_cost
->register_offset
;
6253 case ADDRESS_LO_SUM
:
6254 case ADDRESS_SYMBOLIC
:
6255 case ADDRESS_REG_IMM
:
6256 cost
+= addr_cost
->imm_offset
;
6259 case ADDRESS_REG_WB
:
6260 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
6261 cost
+= addr_cost
->pre_modify
;
6262 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
6263 cost
+= addr_cost
->post_modify
;
6269 case ADDRESS_REG_REG
:
6270 cost
+= addr_cost
->register_offset
;
6273 case ADDRESS_REG_SXTW
:
6274 cost
+= addr_cost
->register_sextend
;
6277 case ADDRESS_REG_UXTW
:
6278 cost
+= addr_cost
->register_zextend
;
6288 /* For the sake of calculating the cost of the shifted register
6289 component, we can treat same sized modes in the same way. */
6290 switch (GET_MODE_BITSIZE (mode
))
6293 cost
+= addr_cost
->addr_scale_costs
.hi
;
6297 cost
+= addr_cost
->addr_scale_costs
.si
;
6301 cost
+= addr_cost
->addr_scale_costs
.di
;
6304 /* We can't tell, or this is a 128-bit vector. */
6306 cost
+= addr_cost
->addr_scale_costs
.ti
;
6314 /* Return the cost of a branch. If SPEED_P is true then the compiler is
6315 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6319 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
6321 /* When optimizing for speed, use the cost of unpredictable branches. */
6322 const struct cpu_branch_cost
*branch_costs
=
6323 aarch64_tune_params
.branch_costs
;
6325 if (!speed_p
|| predictable_p
)
6326 return branch_costs
->predictable
;
6328 return branch_costs
->unpredictable
;
6331 /* Return true if the RTX X in mode MODE is a zero or sign extract
6332 usable in an ADD or SUB (extended register) instruction. */
6334 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
6336 /* Catch add with a sign extract.
6337 This is add_<optab><mode>_multp2. */
6338 if (GET_CODE (x
) == SIGN_EXTRACT
6339 || GET_CODE (x
) == ZERO_EXTRACT
)
6341 rtx op0
= XEXP (x
, 0);
6342 rtx op1
= XEXP (x
, 1);
6343 rtx op2
= XEXP (x
, 2);
6345 if (GET_CODE (op0
) == MULT
6346 && CONST_INT_P (op1
)
6347 && op2
== const0_rtx
6348 && CONST_INT_P (XEXP (op0
, 1))
6349 && aarch64_is_extend_from_extract (mode
,
6356 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6358 else if (GET_CODE (x
) == SIGN_EXTEND
6359 || GET_CODE (x
) == ZERO_EXTEND
)
6360 return REG_P (XEXP (x
, 0));
6366 aarch64_frint_unspec_p (unsigned int u
)
6384 /* Return true iff X is an rtx that will match an extr instruction
6385 i.e. as described in the *extr<mode>5_insn family of patterns.
6386 OP0 and OP1 will be set to the operands of the shifts involved
6387 on success and will be NULL_RTX otherwise. */
6390 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
6393 machine_mode mode
= GET_MODE (x
);
6395 *res_op0
= NULL_RTX
;
6396 *res_op1
= NULL_RTX
;
6398 if (GET_CODE (x
) != IOR
)
6404 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
6405 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
6407 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6408 if (GET_CODE (op1
) == ASHIFT
)
6409 std::swap (op0
, op1
);
6411 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
6414 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
6415 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
6417 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
6418 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
6420 *res_op0
= XEXP (op0
, 0);
6421 *res_op1
= XEXP (op1
, 0);
6429 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6430 storing it in *COST. Result is true if the total cost of the operation
6431 has now been calculated. */
6433 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
6437 enum rtx_code cmpcode
;
6439 if (COMPARISON_P (op0
))
6441 inner
= XEXP (op0
, 0);
6442 comparator
= XEXP (op0
, 1);
6443 cmpcode
= GET_CODE (op0
);
6448 comparator
= const0_rtx
;
6452 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
6454 /* Conditional branch. */
6455 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6459 if (cmpcode
== NE
|| cmpcode
== EQ
)
6461 if (comparator
== const0_rtx
)
6463 /* TBZ/TBNZ/CBZ/CBNZ. */
6464 if (GET_CODE (inner
) == ZERO_EXTRACT
)
6466 *cost
+= rtx_cost (XEXP (inner
, 0), VOIDmode
,
6467 ZERO_EXTRACT
, 0, speed
);
6470 *cost
+= rtx_cost (inner
, VOIDmode
, cmpcode
, 0, speed
);
6475 else if (cmpcode
== LT
|| cmpcode
== GE
)
6478 if (comparator
== const0_rtx
)
6483 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6486 if (GET_CODE (op1
) == COMPARE
)
6488 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6489 if (XEXP (op1
, 1) == const0_rtx
)
6493 machine_mode mode
= GET_MODE (XEXP (op1
, 0));
6494 const struct cpu_cost_table
*extra_cost
6495 = aarch64_tune_params
.insn_extra_cost
;
6497 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6498 *cost
+= extra_cost
->alu
.arith
;
6500 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6505 /* It's a conditional operation based on the status flags,
6506 so it must be some flavor of CSEL. */
6508 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6509 if (GET_CODE (op1
) == NEG
6510 || GET_CODE (op1
) == NOT
6511 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
6512 op1
= XEXP (op1
, 0);
6513 else if (GET_CODE (op1
) == ZERO_EXTEND
&& GET_CODE (op2
) == ZERO_EXTEND
)
6515 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6516 op1
= XEXP (op1
, 0);
6517 op2
= XEXP (op2
, 0);
6520 *cost
+= rtx_cost (op1
, VOIDmode
, IF_THEN_ELSE
, 1, speed
);
6521 *cost
+= rtx_cost (op2
, VOIDmode
, IF_THEN_ELSE
, 2, speed
);
6525 /* We don't know what this is, cost all operands. */
6529 /* Check whether X is a bitfield operation of the form shift + extend that
6530 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6531 operand to which the bitfield operation is applied. Otherwise return
6535 aarch64_extend_bitfield_pattern_p (rtx x
)
6537 rtx_code outer_code
= GET_CODE (x
);
6538 machine_mode outer_mode
= GET_MODE (x
);
6540 if (outer_code
!= ZERO_EXTEND
&& outer_code
!= SIGN_EXTEND
6541 && outer_mode
!= SImode
&& outer_mode
!= DImode
)
6544 rtx inner
= XEXP (x
, 0);
6545 rtx_code inner_code
= GET_CODE (inner
);
6546 machine_mode inner_mode
= GET_MODE (inner
);
6552 if (CONST_INT_P (XEXP (inner
, 1))
6553 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6554 op
= XEXP (inner
, 0);
6557 if (outer_code
== ZERO_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6558 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6559 op
= XEXP (inner
, 0);
6562 if (outer_code
== SIGN_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6563 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6564 op
= XEXP (inner
, 0);
6573 /* Return true if the mask and a shift amount from an RTX of the form
6574 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6575 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6578 aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode
, rtx mask
, rtx shft_amnt
)
6580 return CONST_INT_P (mask
) && CONST_INT_P (shft_amnt
)
6581 && INTVAL (shft_amnt
) < GET_MODE_BITSIZE (mode
)
6582 && exact_log2 ((INTVAL (mask
) >> INTVAL (shft_amnt
)) + 1) >= 0
6583 && (INTVAL (mask
) & ((1 << INTVAL (shft_amnt
)) - 1)) == 0;
6586 /* Calculate the cost of calculating X, storing it in *COST. Result
6587 is true if the total cost of the operation has now been calculated. */
6589 aarch64_rtx_costs (rtx x
, machine_mode mode
, int outer ATTRIBUTE_UNUSED
,
6590 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
6593 const struct cpu_cost_table
*extra_cost
6594 = aarch64_tune_params
.insn_extra_cost
;
6595 int code
= GET_CODE (x
);
6597 /* By default, assume that everything has equivalent cost to the
6598 cheapest instruction. Any additional costs are applied as a delta
6599 above this default. */
6600 *cost
= COSTS_N_INSNS (1);
6605 /* The cost depends entirely on the operands to SET. */
6610 switch (GET_CODE (op0
))
6615 rtx address
= XEXP (op0
, 0);
6616 if (VECTOR_MODE_P (mode
))
6617 *cost
+= extra_cost
->ldst
.storev
;
6618 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6619 *cost
+= extra_cost
->ldst
.store
;
6620 else if (mode
== SFmode
)
6621 *cost
+= extra_cost
->ldst
.storef
;
6622 else if (mode
== DFmode
)
6623 *cost
+= extra_cost
->ldst
.stored
;
6626 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6630 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6634 if (! REG_P (SUBREG_REG (op0
)))
6635 *cost
+= rtx_cost (SUBREG_REG (op0
), VOIDmode
, SET
, 0, speed
);
6639 /* The cost is one per vector-register copied. */
6640 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
6642 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
6643 / GET_MODE_SIZE (V4SImode
);
6644 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
6646 /* const0_rtx is in general free, but we will use an
6647 instruction to set a register to 0. */
6648 else if (REG_P (op1
) || op1
== const0_rtx
)
6650 /* The cost is 1 per register copied. */
6651 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
6653 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
6656 /* Cost is just the cost of the RHS of the set. */
6657 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6662 /* Bit-field insertion. Strip any redundant widening of
6663 the RHS to meet the width of the target. */
6664 if (GET_CODE (op1
) == SUBREG
)
6665 op1
= SUBREG_REG (op1
);
6666 if ((GET_CODE (op1
) == ZERO_EXTEND
6667 || GET_CODE (op1
) == SIGN_EXTEND
)
6668 && CONST_INT_P (XEXP (op0
, 1))
6669 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
6670 >= INTVAL (XEXP (op0
, 1))))
6671 op1
= XEXP (op1
, 0);
6673 if (CONST_INT_P (op1
))
6675 /* MOV immediate is assumed to always be cheap. */
6676 *cost
= COSTS_N_INSNS (1);
6682 *cost
+= extra_cost
->alu
.bfi
;
6683 *cost
+= rtx_cost (op1
, VOIDmode
, (enum rtx_code
) code
, 1, speed
);
6689 /* We can't make sense of this, assume default cost. */
6690 *cost
= COSTS_N_INSNS (1);
6696 /* If an instruction can incorporate a constant within the
6697 instruction, the instruction's expression avoids calling
6698 rtx_cost() on the constant. If rtx_cost() is called on a
6699 constant, then it is usually because the constant must be
6700 moved into a register by one or more instructions.
6702 The exception is constant 0, which can be expressed
6703 as XZR/WZR and is therefore free. The exception to this is
6704 if we have (set (reg) (const0_rtx)) in which case we must cost
6705 the move. However, we can catch that when we cost the SET, so
6706 we don't need to consider that here. */
6707 if (x
== const0_rtx
)
6711 /* To an approximation, building any other constant is
6712 proportionally expensive to the number of instructions
6713 required to build that constant. This is true whether we
6714 are compiling for SPEED or otherwise. */
6715 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
6716 (NULL_RTX
, x
, false, mode
));
6723 /* mov[df,sf]_aarch64. */
6724 if (aarch64_float_const_representable_p (x
))
6725 /* FMOV (scalar immediate). */
6726 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
6727 else if (!aarch64_float_const_zero_rtx_p (x
))
6729 /* This will be a load from memory. */
6731 *cost
+= extra_cost
->ldst
.loadd
;
6733 *cost
+= extra_cost
->ldst
.loadf
;
6736 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6737 or MOV v0.s[0], wzr - neither of which are modeled by the
6738 cost tables. Just use the default cost. */
6748 /* For loads we want the base cost of a load, plus an
6749 approximation for the additional cost of the addressing
6751 rtx address
= XEXP (x
, 0);
6752 if (VECTOR_MODE_P (mode
))
6753 *cost
+= extra_cost
->ldst
.loadv
;
6754 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6755 *cost
+= extra_cost
->ldst
.load
;
6756 else if (mode
== SFmode
)
6757 *cost
+= extra_cost
->ldst
.loadf
;
6758 else if (mode
== DFmode
)
6759 *cost
+= extra_cost
->ldst
.loadd
;
6762 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6771 if (VECTOR_MODE_P (mode
))
6776 *cost
+= extra_cost
->vect
.alu
;
6781 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6783 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
6784 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
6787 *cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, NEG
, 0, speed
);
6791 /* Cost this as SUB wzr, X. */
6792 op0
= CONST0_RTX (mode
);
6797 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6799 /* Support (neg(fma...)) as a single instruction only if
6800 sign of zeros is unimportant. This matches the decision
6801 making in aarch64.md. */
6802 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
6805 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
6808 if (GET_CODE (op0
) == MULT
)
6811 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
6816 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6826 if (VECTOR_MODE_P (mode
))
6827 *cost
+= extra_cost
->vect
.alu
;
6829 *cost
+= extra_cost
->alu
.clz
;
6838 if (op1
== const0_rtx
6839 && GET_CODE (op0
) == AND
)
6842 mode
= GET_MODE (op0
);
6846 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
6848 /* TODO: A write to the CC flags possibly costs extra, this
6849 needs encoding in the cost tables. */
6851 mode
= GET_MODE (op0
);
6853 if (GET_CODE (op0
) == AND
)
6859 if (GET_CODE (op0
) == PLUS
)
6861 /* ADDS (and CMN alias). */
6866 if (GET_CODE (op0
) == MINUS
)
6873 if (GET_CODE (op0
) == ZERO_EXTRACT
&& op1
== const0_rtx
6874 && GET_MODE (x
) == CC_NZmode
&& CONST_INT_P (XEXP (op0
, 1))
6875 && CONST_INT_P (XEXP (op0
, 2)))
6877 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6878 Handle it here directly rather than going to cost_logic
6879 since we know the immediate generated for the TST is valid
6880 so we can avoid creating an intermediate rtx for it only
6881 for costing purposes. */
6883 *cost
+= extra_cost
->alu
.logical
;
6885 *cost
+= rtx_cost (XEXP (op0
, 0), GET_MODE (op0
),
6886 ZERO_EXTRACT
, 0, speed
);
6890 if (GET_CODE (op1
) == NEG
)
6894 *cost
+= extra_cost
->alu
.arith
;
6896 *cost
+= rtx_cost (op0
, mode
, COMPARE
, 0, speed
);
6897 *cost
+= rtx_cost (XEXP (op1
, 0), mode
, NEG
, 1, speed
);
6903 Compare can freely swap the order of operands, and
6904 canonicalization puts the more complex operation first.
6905 But the integer MINUS logic expects the shift/extend
6906 operation in op1. */
6908 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
6916 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6920 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6922 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
6924 *cost
+= rtx_cost (op0
, VOIDmode
, COMPARE
, 0, speed
);
6925 /* FCMP supports constant 0.0 for no extra cost. */
6931 if (VECTOR_MODE_P (mode
))
6933 /* Vector compare. */
6935 *cost
+= extra_cost
->vect
.alu
;
6937 if (aarch64_float_const_zero_rtx_p (op1
))
6939 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6953 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed
);
6955 /* Detect valid immediates. */
6956 if ((GET_MODE_CLASS (mode
) == MODE_INT
6957 || (GET_MODE_CLASS (mode
) == MODE_CC
6958 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
6959 && CONST_INT_P (op1
)
6960 && aarch64_uimm12_shift (INTVAL (op1
)))
6963 /* SUB(S) (immediate). */
6964 *cost
+= extra_cost
->alu
.arith
;
6968 /* Look for SUB (extended register). */
6969 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
6972 *cost
+= extra_cost
->alu
.extend_arith
;
6974 op1
= aarch64_strip_extend (op1
);
6975 *cost
+= rtx_cost (op1
, VOIDmode
,
6976 (enum rtx_code
) GET_CODE (op1
), 0, speed
);
6980 rtx new_op1
= aarch64_strip_extend (op1
);
6982 /* Cost this as an FMA-alike operation. */
6983 if ((GET_CODE (new_op1
) == MULT
6984 || aarch64_shift_p (GET_CODE (new_op1
)))
6987 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
6988 (enum rtx_code
) code
,
6993 *cost
+= rtx_cost (new_op1
, VOIDmode
, MINUS
, 1, speed
);
6997 if (VECTOR_MODE_P (mode
))
7000 *cost
+= extra_cost
->vect
.alu
;
7002 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7005 *cost
+= extra_cost
->alu
.arith
;
7007 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7010 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7024 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
7025 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
7028 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, PLUS
, 0, speed
);
7029 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7033 if (GET_MODE_CLASS (mode
) == MODE_INT
7034 && CONST_INT_P (op1
)
7035 && aarch64_uimm12_shift (INTVAL (op1
)))
7037 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed
);
7040 /* ADD (immediate). */
7041 *cost
+= extra_cost
->alu
.arith
;
7045 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7047 /* Look for ADD (extended register). */
7048 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
7051 *cost
+= extra_cost
->alu
.extend_arith
;
7053 op0
= aarch64_strip_extend (op0
);
7054 *cost
+= rtx_cost (op0
, VOIDmode
,
7055 (enum rtx_code
) GET_CODE (op0
), 0, speed
);
7059 /* Strip any extend, leave shifts behind as we will
7060 cost them through mult_cost. */
7061 new_op0
= aarch64_strip_extend (op0
);
7063 if (GET_CODE (new_op0
) == MULT
7064 || aarch64_shift_p (GET_CODE (new_op0
)))
7066 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
7071 *cost
+= rtx_cost (new_op0
, VOIDmode
, PLUS
, 0, speed
);
7075 if (VECTOR_MODE_P (mode
))
7078 *cost
+= extra_cost
->vect
.alu
;
7080 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7083 *cost
+= extra_cost
->alu
.arith
;
7085 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7088 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7095 *cost
= COSTS_N_INSNS (1);
7099 if (VECTOR_MODE_P (mode
))
7100 *cost
+= extra_cost
->vect
.alu
;
7102 *cost
+= extra_cost
->alu
.rev
;
7107 if (aarch_rev16_p (x
))
7109 *cost
= COSTS_N_INSNS (1);
7113 if (VECTOR_MODE_P (mode
))
7114 *cost
+= extra_cost
->vect
.alu
;
7116 *cost
+= extra_cost
->alu
.rev
;
7121 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
7123 *cost
+= rtx_cost (op0
, mode
, IOR
, 0, speed
);
7124 *cost
+= rtx_cost (op1
, mode
, IOR
, 1, speed
);
7126 *cost
+= extra_cost
->alu
.shift
;
7137 if (VECTOR_MODE_P (mode
))
7140 *cost
+= extra_cost
->vect
.alu
;
7145 && GET_CODE (op0
) == MULT
7146 && CONST_INT_P (XEXP (op0
, 1))
7147 && CONST_INT_P (op1
)
7148 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
7151 /* This is a UBFM/SBFM. */
7152 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, ZERO_EXTRACT
, 0, speed
);
7154 *cost
+= extra_cost
->alu
.bfx
;
7158 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7160 if (CONST_INT_P (op1
))
7162 /* We have a mask + shift version of a UBFIZ
7163 i.e. the *andim_ashift<mode>_bfiz pattern. */
7164 if (GET_CODE (op0
) == ASHIFT
7165 && aarch64_mask_and_shift_for_ubfiz_p (mode
, op1
,
7168 *cost
+= rtx_cost (XEXP (op0
, 0), mode
,
7169 (enum rtx_code
) code
, 0, speed
);
7171 *cost
+= extra_cost
->alu
.bfx
;
7175 else if (aarch64_bitmask_imm (INTVAL (op1
), mode
))
7177 /* We possibly get the immediate for free, this is not
7179 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7181 *cost
+= extra_cost
->alu
.logical
;
7190 /* Handle ORN, EON, or BIC. */
7191 if (GET_CODE (op0
) == NOT
)
7192 op0
= XEXP (op0
, 0);
7194 new_op0
= aarch64_strip_shift (op0
);
7196 /* If we had a shift on op0 then this is a logical-shift-
7197 by-register/immediate operation. Otherwise, this is just
7198 a logical operation. */
7203 /* Shift by immediate. */
7204 if (CONST_INT_P (XEXP (op0
, 1)))
7205 *cost
+= extra_cost
->alu
.log_shift
;
7207 *cost
+= extra_cost
->alu
.log_shift_reg
;
7210 *cost
+= extra_cost
->alu
.logical
;
7213 /* In both cases we want to cost both operands. */
7214 *cost
+= rtx_cost (new_op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7215 *cost
+= rtx_cost (op1
, mode
, (enum rtx_code
) code
, 1, speed
);
7224 op0
= aarch64_strip_shift (x
);
7226 if (VECTOR_MODE_P (mode
))
7229 *cost
+= extra_cost
->vect
.alu
;
7233 /* MVN-shifted-reg. */
7236 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7239 *cost
+= extra_cost
->alu
.log_shift
;
7243 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7244 Handle the second form here taking care that 'a' in the above can
7246 else if (GET_CODE (op0
) == XOR
)
7248 rtx newop0
= XEXP (op0
, 0);
7249 rtx newop1
= XEXP (op0
, 1);
7250 rtx op0_stripped
= aarch64_strip_shift (newop0
);
7252 *cost
+= rtx_cost (newop1
, mode
, (enum rtx_code
) code
, 1, speed
);
7253 *cost
+= rtx_cost (op0_stripped
, mode
, XOR
, 0, speed
);
7257 if (op0_stripped
!= newop0
)
7258 *cost
+= extra_cost
->alu
.log_shift
;
7260 *cost
+= extra_cost
->alu
.logical
;
7267 *cost
+= extra_cost
->alu
.logical
;
7274 /* If a value is written in SI mode, then zero extended to DI
7275 mode, the operation will in general be free as a write to
7276 a 'w' register implicitly zeroes the upper bits of an 'x'
7277 register. However, if this is
7279 (set (reg) (zero_extend (reg)))
7281 we must cost the explicit register move. */
7283 && GET_MODE (op0
) == SImode
7286 int op_cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, 0, speed
);
7288 /* If OP_COST is non-zero, then the cost of the zero extend
7289 is effectively the cost of the inner operation. Otherwise
7290 we have a MOV instruction and we take the cost from the MOV
7291 itself. This is true independently of whether we are
7292 optimizing for space or time. */
7298 else if (MEM_P (op0
))
7300 /* All loads can zero extend to any size for free. */
7301 *cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, param
, speed
);
7305 op0
= aarch64_extend_bitfield_pattern_p (x
);
7308 *cost
+= rtx_cost (op0
, mode
, ZERO_EXTEND
, 0, speed
);
7310 *cost
+= extra_cost
->alu
.bfx
;
7316 if (VECTOR_MODE_P (mode
))
7319 *cost
+= extra_cost
->vect
.alu
;
7323 /* We generate an AND instead of UXTB/UXTH. */
7324 *cost
+= extra_cost
->alu
.logical
;
7330 if (MEM_P (XEXP (x
, 0)))
7335 rtx address
= XEXP (XEXP (x
, 0), 0);
7336 *cost
+= extra_cost
->ldst
.load_sign_extend
;
7339 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
7345 op0
= aarch64_extend_bitfield_pattern_p (x
);
7348 *cost
+= rtx_cost (op0
, mode
, SIGN_EXTEND
, 0, speed
);
7350 *cost
+= extra_cost
->alu
.bfx
;
7356 if (VECTOR_MODE_P (mode
))
7357 *cost
+= extra_cost
->vect
.alu
;
7359 *cost
+= extra_cost
->alu
.extend
;
7367 if (CONST_INT_P (op1
))
7371 if (VECTOR_MODE_P (mode
))
7373 /* Vector shift (immediate). */
7374 *cost
+= extra_cost
->vect
.alu
;
7378 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7380 *cost
+= extra_cost
->alu
.shift
;
7384 /* We can incorporate zero/sign extend for free. */
7385 if (GET_CODE (op0
) == ZERO_EXTEND
7386 || GET_CODE (op0
) == SIGN_EXTEND
)
7387 op0
= XEXP (op0
, 0);
7389 *cost
+= rtx_cost (op0
, VOIDmode
, ASHIFT
, 0, speed
);
7396 if (VECTOR_MODE_P (mode
))
7398 /* Vector shift (register). */
7399 *cost
+= extra_cost
->vect
.alu
;
7404 *cost
+= extra_cost
->alu
.shift_reg
;
7407 return false; /* All arguments need to be in registers. */
7417 if (CONST_INT_P (op1
))
7419 /* ASR (immediate) and friends. */
7422 if (VECTOR_MODE_P (mode
))
7423 *cost
+= extra_cost
->vect
.alu
;
7425 *cost
+= extra_cost
->alu
.shift
;
7428 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7434 /* ASR (register) and friends. */
7437 if (VECTOR_MODE_P (mode
))
7438 *cost
+= extra_cost
->vect
.alu
;
7440 *cost
+= extra_cost
->alu
.shift_reg
;
7442 return false; /* All arguments need to be in registers. */
7447 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
7448 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
)
7452 *cost
+= extra_cost
->ldst
.load
;
7454 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
7455 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
7457 /* ADRP, followed by ADD. */
7458 *cost
+= COSTS_N_INSNS (1);
7460 *cost
+= 2 * extra_cost
->alu
.arith
;
7462 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
7463 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
7467 *cost
+= extra_cost
->alu
.arith
;
7472 /* One extra load instruction, after accessing the GOT. */
7473 *cost
+= COSTS_N_INSNS (1);
7475 *cost
+= extra_cost
->ldst
.load
;
7481 /* ADRP/ADD (immediate). */
7483 *cost
+= extra_cost
->alu
.arith
;
7491 if (VECTOR_MODE_P (mode
))
7492 *cost
+= extra_cost
->vect
.alu
;
7494 *cost
+= extra_cost
->alu
.bfx
;
7497 /* We can trust that the immediates used will be correct (there
7498 are no by-register forms), so we need only cost op0. */
7499 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (enum rtx_code
) code
, 0, speed
);
7503 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
7504 /* aarch64_rtx_mult_cost always handles recursion to its
7509 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7510 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7511 an unconditional negate. This case should only ever be reached through
7512 the set_smod_pow2_cheap check in expmed.c. */
7513 if (CONST_INT_P (XEXP (x
, 1))
7514 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
7515 && (mode
== SImode
|| mode
== DImode
))
7517 /* We expand to 4 instructions. Reset the baseline. */
7518 *cost
= COSTS_N_INSNS (4);
7521 *cost
+= 2 * extra_cost
->alu
.logical
7522 + 2 * extra_cost
->alu
.arith
;
7531 if (VECTOR_MODE_P (mode
))
7532 *cost
+= extra_cost
->vect
.alu
;
7533 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7534 *cost
+= (extra_cost
->mult
[mode
== DImode
].add
7535 + extra_cost
->mult
[mode
== DImode
].idiv
);
7536 else if (mode
== DFmode
)
7537 *cost
+= (extra_cost
->fp
[1].mult
7538 + extra_cost
->fp
[1].div
);
7539 else if (mode
== SFmode
)
7540 *cost
+= (extra_cost
->fp
[0].mult
7541 + extra_cost
->fp
[0].div
);
7543 return false; /* All arguments need to be in registers. */
7550 if (VECTOR_MODE_P (mode
))
7551 *cost
+= extra_cost
->vect
.alu
;
7552 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7553 /* There is no integer SQRT, so only DIV and UDIV can get
7555 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
7557 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
7559 return false; /* All arguments need to be in registers. */
7562 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
7563 XEXP (x
, 2), cost
, speed
);
7576 return false; /* All arguments must be in registers. */
7585 if (VECTOR_MODE_P (mode
))
7586 *cost
+= extra_cost
->vect
.alu
;
7588 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
7591 /* FMSUB, FNMADD, and FNMSUB are free. */
7592 if (GET_CODE (op0
) == NEG
)
7593 op0
= XEXP (op0
, 0);
7595 if (GET_CODE (op2
) == NEG
)
7596 op2
= XEXP (op2
, 0);
7598 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7599 and the by-element operand as operand 0. */
7600 if (GET_CODE (op1
) == NEG
)
7601 op1
= XEXP (op1
, 0);
7603 /* Catch vector-by-element operations. The by-element operand can
7604 either be (vec_duplicate (vec_select (x))) or just
7605 (vec_select (x)), depending on whether we are multiplying by
7606 a vector or a scalar.
7608 Canonicalization is not very good in these cases, FMA4 will put the
7609 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7610 if (GET_CODE (op0
) == VEC_DUPLICATE
)
7611 op0
= XEXP (op0
, 0);
7612 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
7613 op1
= XEXP (op1
, 0);
7615 if (GET_CODE (op0
) == VEC_SELECT
)
7616 op0
= XEXP (op0
, 0);
7617 else if (GET_CODE (op1
) == VEC_SELECT
)
7618 op1
= XEXP (op1
, 0);
7620 /* If the remaining parameters are not registers,
7621 get the cost to put them into registers. */
7622 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed
);
7623 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed
);
7624 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed
);
7628 case UNSIGNED_FLOAT
:
7630 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
7636 if (VECTOR_MODE_P (mode
))
7638 /*Vector truncate. */
7639 *cost
+= extra_cost
->vect
.alu
;
7642 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
7646 case FLOAT_TRUNCATE
:
7649 if (VECTOR_MODE_P (mode
))
7651 /*Vector conversion. */
7652 *cost
+= extra_cost
->vect
.alu
;
7655 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
7662 /* Strip the rounding part. They will all be implemented
7663 by the fcvt* family of instructions anyway. */
7664 if (GET_CODE (x
) == UNSPEC
)
7666 unsigned int uns_code
= XINT (x
, 1);
7668 if (uns_code
== UNSPEC_FRINTA
7669 || uns_code
== UNSPEC_FRINTM
7670 || uns_code
== UNSPEC_FRINTN
7671 || uns_code
== UNSPEC_FRINTP
7672 || uns_code
== UNSPEC_FRINTZ
)
7673 x
= XVECEXP (x
, 0, 0);
7678 if (VECTOR_MODE_P (mode
))
7679 *cost
+= extra_cost
->vect
.alu
;
7681 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
7684 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7685 fixed-point fcvt. */
7686 if (GET_CODE (x
) == MULT
7687 && ((VECTOR_MODE_P (mode
)
7688 && aarch64_vec_fpconst_pow_of_2 (XEXP (x
, 1)) > 0)
7689 || aarch64_fpconst_pow_of_2 (XEXP (x
, 1)) > 0))
7691 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (rtx_code
) code
,
7696 *cost
+= rtx_cost (x
, VOIDmode
, (enum rtx_code
) code
, 0, speed
);
7700 if (VECTOR_MODE_P (mode
))
7704 *cost
+= extra_cost
->vect
.alu
;
7706 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7710 /* FABD, which is analogous to FADD. */
7711 if (GET_CODE (op0
) == MINUS
)
7713 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed
);
7714 *cost
+= rtx_cost (XEXP (op0
, 1), mode
, MINUS
, 1, speed
);
7716 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7720 /* Simple FABS is analogous to FNEG. */
7722 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
7726 /* Integer ABS will either be split to
7727 two arithmetic instructions, or will be an ABS
7728 (scalar), which we don't model. */
7729 *cost
= COSTS_N_INSNS (2);
7731 *cost
+= 2 * extra_cost
->alu
.arith
;
7739 if (VECTOR_MODE_P (mode
))
7740 *cost
+= extra_cost
->vect
.alu
;
7743 /* FMAXNM/FMINNM/FMAX/FMIN.
7744 TODO: This may not be accurate for all implementations, but
7745 we do not model this in the cost tables. */
7746 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7752 /* The floating point round to integer frint* instructions. */
7753 if (aarch64_frint_unspec_p (XINT (x
, 1)))
7756 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
7761 if (XINT (x
, 1) == UNSPEC_RBIT
)
7764 *cost
+= extra_cost
->alu
.rev
;
7772 /* Decompose <su>muldi3_highpart. */
7773 if (/* (truncate:DI */
7776 && GET_MODE (XEXP (x
, 0)) == TImode
7777 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7779 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7780 /* (ANY_EXTEND:TI (reg:DI))
7781 (ANY_EXTEND:TI (reg:DI))) */
7782 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7783 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
7784 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
7785 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
7786 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
7787 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
7788 /* (const_int 64) */
7789 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7790 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
7794 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
7795 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
7796 mode
, MULT
, 0, speed
);
7797 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
7798 mode
, MULT
, 1, speed
);
7808 && flag_aarch64_verbose_cost
)
7810 "\nFailed to cost RTX. Assuming default cost.\n");
7815 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7816 calculated for X. This cost is stored in *COST. Returns true
7817 if the total cost of X was calculated. */
7819 aarch64_rtx_costs_wrapper (rtx x
, machine_mode mode
, int outer
,
7820 int param
, int *cost
, bool speed
)
7822 bool result
= aarch64_rtx_costs (x
, mode
, outer
, param
, cost
, speed
);
7825 && flag_aarch64_verbose_cost
)
7827 print_rtl_single (dump_file
, x
);
7828 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
7829 speed
? "Hot" : "Cold",
7830 *cost
, result
? "final" : "partial");
7837 aarch64_register_move_cost (machine_mode mode
,
7838 reg_class_t from_i
, reg_class_t to_i
)
7840 enum reg_class from
= (enum reg_class
) from_i
;
7841 enum reg_class to
= (enum reg_class
) to_i
;
7842 const struct cpu_regmove_cost
*regmove_cost
7843 = aarch64_tune_params
.regmove_cost
;
7845 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
7846 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
7849 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
7850 from
= GENERAL_REGS
;
7852 /* Moving between GPR and stack cost is the same as GP2GP. */
7853 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
7854 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
7855 return regmove_cost
->GP2GP
;
7857 /* To/From the stack register, we move via the gprs. */
7858 if (to
== STACK_REG
|| from
== STACK_REG
)
7859 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
7860 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
7862 if (GET_MODE_SIZE (mode
) == 16)
7864 /* 128-bit operations on general registers require 2 instructions. */
7865 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7866 return regmove_cost
->GP2GP
* 2;
7867 else if (from
== GENERAL_REGS
)
7868 return regmove_cost
->GP2FP
* 2;
7869 else if (to
== GENERAL_REGS
)
7870 return regmove_cost
->FP2GP
* 2;
7872 /* When AdvSIMD instructions are disabled it is not possible to move
7873 a 128-bit value directly between Q registers. This is handled in
7874 secondary reload. A general register is used as a scratch to move
7875 the upper DI value and the lower DI value is moved directly,
7876 hence the cost is the sum of three moves. */
7878 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
7880 return regmove_cost
->FP2FP
;
7883 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7884 return regmove_cost
->GP2GP
;
7885 else if (from
== GENERAL_REGS
)
7886 return regmove_cost
->GP2FP
;
7887 else if (to
== GENERAL_REGS
)
7888 return regmove_cost
->FP2GP
;
7890 return regmove_cost
->FP2FP
;
7894 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
7895 reg_class_t rclass ATTRIBUTE_UNUSED
,
7896 bool in ATTRIBUTE_UNUSED
)
7898 return aarch64_tune_params
.memmov_cost
;
7901 /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7902 to optimize 1.0/sqrt. */
7905 use_rsqrt_p (machine_mode mode
)
7907 return (!flag_trapping_math
7908 && flag_unsafe_math_optimizations
7909 && ((aarch64_tune_params
.approx_modes
->recip_sqrt
7910 & AARCH64_APPROX_MODE (mode
))
7911 || flag_mrecip_low_precision_sqrt
));
7914 /* Function to decide when to use the approximate reciprocal square root
7918 aarch64_builtin_reciprocal (tree fndecl
)
7920 machine_mode mode
= TYPE_MODE (TREE_TYPE (fndecl
));
7922 if (!use_rsqrt_p (mode
))
7924 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl
));
7927 typedef rtx (*rsqrte_type
) (rtx
, rtx
);
7929 /* Select reciprocal square root initial estimate insn depending on machine
7933 get_rsqrte_type (machine_mode mode
)
7937 case DFmode
: return gen_aarch64_rsqrtedf
;
7938 case SFmode
: return gen_aarch64_rsqrtesf
;
7939 case V2DFmode
: return gen_aarch64_rsqrtev2df
;
7940 case V2SFmode
: return gen_aarch64_rsqrtev2sf
;
7941 case V4SFmode
: return gen_aarch64_rsqrtev4sf
;
7942 default: gcc_unreachable ();
7946 typedef rtx (*rsqrts_type
) (rtx
, rtx
, rtx
);
7948 /* Select reciprocal square root series step insn depending on machine mode. */
7951 get_rsqrts_type (machine_mode mode
)
7955 case DFmode
: return gen_aarch64_rsqrtsdf
;
7956 case SFmode
: return gen_aarch64_rsqrtssf
;
7957 case V2DFmode
: return gen_aarch64_rsqrtsv2df
;
7958 case V2SFmode
: return gen_aarch64_rsqrtsv2sf
;
7959 case V4SFmode
: return gen_aarch64_rsqrtsv4sf
;
7960 default: gcc_unreachable ();
7964 /* Emit instruction sequence to compute either the approximate square root
7965 or its approximate reciprocal, depending on the flag RECP, and return
7966 whether the sequence was emitted or not. */
7969 aarch64_emit_approx_sqrt (rtx dst
, rtx src
, bool recp
)
7971 machine_mode mode
= GET_MODE (dst
);
7973 if (GET_MODE_INNER (mode
) == HFmode
)
7976 machine_mode mmsk
= mode_for_vector
7977 (int_mode_for_mode (GET_MODE_INNER (mode
)),
7978 GET_MODE_NUNITS (mode
));
7979 bool use_approx_sqrt_p
= (!recp
7980 && (flag_mlow_precision_sqrt
7981 || (aarch64_tune_params
.approx_modes
->sqrt
7982 & AARCH64_APPROX_MODE (mode
))));
7983 bool use_approx_rsqrt_p
= (recp
7984 && (flag_mrecip_low_precision_sqrt
7985 || (aarch64_tune_params
.approx_modes
->recip_sqrt
7986 & AARCH64_APPROX_MODE (mode
))));
7988 if (!flag_finite_math_only
7989 || flag_trapping_math
7990 || !flag_unsafe_math_optimizations
7991 || !(use_approx_sqrt_p
|| use_approx_rsqrt_p
)
7992 || optimize_function_for_size_p (cfun
))
7995 rtx xmsk
= gen_reg_rtx (mmsk
);
7997 /* When calculating the approximate square root, compare the argument with
7998 0.0 and create a mask. */
7999 emit_insn (gen_rtx_SET (xmsk
, gen_rtx_NEG (mmsk
, gen_rtx_EQ (mmsk
, src
,
8000 CONST0_RTX (mode
)))));
8002 /* Estimate the approximate reciprocal square root. */
8003 rtx xdst
= gen_reg_rtx (mode
);
8004 emit_insn ((*get_rsqrte_type (mode
)) (xdst
, src
));
8006 /* Iterate over the series twice for SF and thrice for DF. */
8007 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8009 /* Optionally iterate over the series once less for faster performance
8010 while sacrificing the accuracy. */
8011 if ((recp
&& flag_mrecip_low_precision_sqrt
)
8012 || (!recp
&& flag_mlow_precision_sqrt
))
8015 /* Iterate over the series to calculate the approximate reciprocal square
8017 rtx x1
= gen_reg_rtx (mode
);
8018 while (iterations
--)
8020 rtx x2
= gen_reg_rtx (mode
);
8021 emit_set_insn (x2
, gen_rtx_MULT (mode
, xdst
, xdst
));
8023 emit_insn ((*get_rsqrts_type (mode
)) (x1
, src
, x2
));
8026 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, x1
));
8031 /* Qualify the approximate reciprocal square root when the argument is
8032 0.0 by squashing the intermediary result to 0.0. */
8033 rtx xtmp
= gen_reg_rtx (mmsk
);
8034 emit_set_insn (xtmp
, gen_rtx_AND (mmsk
, gen_rtx_NOT (mmsk
, xmsk
),
8035 gen_rtx_SUBREG (mmsk
, xdst
, 0)));
8036 emit_move_insn (xdst
, gen_rtx_SUBREG (mode
, xtmp
, 0));
8038 /* Calculate the approximate square root. */
8039 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, src
));
8042 /* Finalize the approximation. */
8043 emit_set_insn (dst
, gen_rtx_MULT (mode
, xdst
, x1
));
8048 typedef rtx (*recpe_type
) (rtx
, rtx
);
8050 /* Select reciprocal initial estimate insn depending on machine mode. */
8053 get_recpe_type (machine_mode mode
)
8057 case SFmode
: return (gen_aarch64_frecpesf
);
8058 case V2SFmode
: return (gen_aarch64_frecpev2sf
);
8059 case V4SFmode
: return (gen_aarch64_frecpev4sf
);
8060 case DFmode
: return (gen_aarch64_frecpedf
);
8061 case V2DFmode
: return (gen_aarch64_frecpev2df
);
8062 default: gcc_unreachable ();
8066 typedef rtx (*recps_type
) (rtx
, rtx
, rtx
);
8068 /* Select reciprocal series step insn depending on machine mode. */
8071 get_recps_type (machine_mode mode
)
8075 case SFmode
: return (gen_aarch64_frecpssf
);
8076 case V2SFmode
: return (gen_aarch64_frecpsv2sf
);
8077 case V4SFmode
: return (gen_aarch64_frecpsv4sf
);
8078 case DFmode
: return (gen_aarch64_frecpsdf
);
8079 case V2DFmode
: return (gen_aarch64_frecpsv2df
);
8080 default: gcc_unreachable ();
8084 /* Emit the instruction sequence to compute the approximation for the division
8085 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8088 aarch64_emit_approx_div (rtx quo
, rtx num
, rtx den
)
8090 machine_mode mode
= GET_MODE (quo
);
8092 if (GET_MODE_INNER (mode
) == HFmode
)
8095 bool use_approx_division_p
= (flag_mlow_precision_div
8096 || (aarch64_tune_params
.approx_modes
->division
8097 & AARCH64_APPROX_MODE (mode
)));
8099 if (!flag_finite_math_only
8100 || flag_trapping_math
8101 || !flag_unsafe_math_optimizations
8102 || optimize_function_for_size_p (cfun
)
8103 || !use_approx_division_p
)
8106 /* Estimate the approximate reciprocal. */
8107 rtx xrcp
= gen_reg_rtx (mode
);
8108 emit_insn ((*get_recpe_type (mode
)) (xrcp
, den
));
8110 /* Iterate over the series twice for SF and thrice for DF. */
8111 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8113 /* Optionally iterate over the series once less for faster performance,
8114 while sacrificing the accuracy. */
8115 if (flag_mlow_precision_div
)
8118 /* Iterate over the series to calculate the approximate reciprocal. */
8119 rtx xtmp
= gen_reg_rtx (mode
);
8120 while (iterations
--)
8122 emit_insn ((*get_recps_type (mode
)) (xtmp
, xrcp
, den
));
8125 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8128 if (num
!= CONST1_RTX (mode
))
8130 /* As the approximate reciprocal of DEN is already calculated, only
8131 calculate the approximate division when NUM is not 1.0. */
8132 rtx xnum
= force_reg (mode
, num
);
8133 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xnum
));
8136 /* Finalize the approximation. */
8137 emit_set_insn (quo
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8141 /* Return the number of instructions that can be issued per cycle. */
8143 aarch64_sched_issue_rate (void)
8145 return aarch64_tune_params
.issue_rate
;
8149 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8151 int issue_rate
= aarch64_sched_issue_rate ();
8153 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
8157 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8158 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8159 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8162 aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
,
8165 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
8169 /* Vectorizer cost model target hooks. */
8171 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8173 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
8175 int misalign ATTRIBUTE_UNUSED
)
8178 const cpu_vector_cost
*costs
= aarch64_tune_params
.vec_costs
;
8181 if (vectype
!= NULL
)
8182 fp
= FLOAT_TYPE_P (vectype
);
8184 switch (type_of_cost
)
8187 return fp
? costs
->scalar_fp_stmt_cost
: costs
->scalar_int_stmt_cost
;
8190 return costs
->scalar_load_cost
;
8193 return costs
->scalar_store_cost
;
8196 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8199 return costs
->vec_align_load_cost
;
8202 return costs
->vec_store_cost
;
8205 return costs
->vec_to_scalar_cost
;
8208 return costs
->scalar_to_vec_cost
;
8210 case unaligned_load
:
8211 return costs
->vec_unalign_load_cost
;
8213 case unaligned_store
:
8214 return costs
->vec_unalign_store_cost
;
8216 case cond_branch_taken
:
8217 return costs
->cond_taken_branch_cost
;
8219 case cond_branch_not_taken
:
8220 return costs
->cond_not_taken_branch_cost
;
8223 return costs
->vec_permute_cost
;
8225 case vec_promote_demote
:
8226 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8229 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
8230 return elements
/ 2 + 1;
8237 /* Implement targetm.vectorize.add_stmt_cost. */
8239 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
8240 struct _stmt_vec_info
*stmt_info
, int misalign
,
8241 enum vect_cost_model_location where
)
8243 unsigned *cost
= (unsigned *) data
;
8244 unsigned retval
= 0;
8246 if (flag_vect_cost_model
)
8248 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
8250 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
8252 /* Statements in an inner loop relative to the loop being
8253 vectorized are weighted more heavily. The value here is
8254 arbitrary and could potentially be improved with analysis. */
8255 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
8256 count
*= 50; /* FIXME */
8258 retval
= (unsigned) (count
* stmt_cost
);
8259 cost
[where
] += retval
;
8265 static void initialize_aarch64_code_model (struct gcc_options
*);
8267 /* Parse the TO_PARSE string and put the architecture struct that it
8268 selects into RES and the architectural features into ISA_FLAGS.
8269 Return an aarch64_parse_opt_result describing the parse result.
8270 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
8272 static enum aarch64_parse_opt_result
8273 aarch64_parse_arch (const char *to_parse
, const struct processor
**res
,
8274 unsigned long *isa_flags
)
8277 const struct processor
*arch
;
8278 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8281 strcpy (str
, to_parse
);
8283 ext
= strchr (str
, '+');
8291 return AARCH64_PARSE_MISSING_ARG
;
8294 /* Loop through the list of supported ARCHes to find a match. */
8295 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
8297 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
8299 unsigned long isa_temp
= arch
->flags
;
8303 /* TO_PARSE string contains at least one extension. */
8304 enum aarch64_parse_opt_result ext_res
8305 = aarch64_parse_extension (ext
, &isa_temp
);
8307 if (ext_res
!= AARCH64_PARSE_OK
)
8310 /* Extension parsing was successful. Confirm the result
8311 arch and ISA flags. */
8313 *isa_flags
= isa_temp
;
8314 return AARCH64_PARSE_OK
;
8318 /* ARCH name not found in list. */
8319 return AARCH64_PARSE_INVALID_ARG
;
8322 /* Parse the TO_PARSE string and put the result tuning in RES and the
8323 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8324 describing the parse result. If there is an error parsing, RES and
8325 ISA_FLAGS are left unchanged. */
8327 static enum aarch64_parse_opt_result
8328 aarch64_parse_cpu (const char *to_parse
, const struct processor
**res
,
8329 unsigned long *isa_flags
)
8332 const struct processor
*cpu
;
8333 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8336 strcpy (str
, to_parse
);
8338 ext
= strchr (str
, '+');
8346 return AARCH64_PARSE_MISSING_ARG
;
8349 /* Loop through the list of supported CPUs to find a match. */
8350 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8352 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
8354 unsigned long isa_temp
= cpu
->flags
;
8359 /* TO_PARSE string contains at least one extension. */
8360 enum aarch64_parse_opt_result ext_res
8361 = aarch64_parse_extension (ext
, &isa_temp
);
8363 if (ext_res
!= AARCH64_PARSE_OK
)
8366 /* Extension parsing was successfull. Confirm the result
8367 cpu and ISA flags. */
8369 *isa_flags
= isa_temp
;
8370 return AARCH64_PARSE_OK
;
8374 /* CPU name not found in list. */
8375 return AARCH64_PARSE_INVALID_ARG
;
8378 /* Parse the TO_PARSE string and put the cpu it selects into RES.
8379 Return an aarch64_parse_opt_result describing the parse result.
8380 If the parsing fails the RES does not change. */
8382 static enum aarch64_parse_opt_result
8383 aarch64_parse_tune (const char *to_parse
, const struct processor
**res
)
8385 const struct processor
*cpu
;
8386 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8388 strcpy (str
, to_parse
);
8390 /* Loop through the list of supported CPUs to find a match. */
8391 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8393 if (strcmp (cpu
->name
, str
) == 0)
8396 return AARCH64_PARSE_OK
;
8400 /* CPU name not found in list. */
8401 return AARCH64_PARSE_INVALID_ARG
;
8404 /* Parse TOKEN, which has length LENGTH to see if it is an option
8405 described in FLAG. If it is, return the index bit for that fusion type.
8406 If not, error (printing OPTION_NAME) and return zero. */
8409 aarch64_parse_one_option_token (const char *token
,
8411 const struct aarch64_flag_desc
*flag
,
8412 const char *option_name
)
8414 for (; flag
->name
!= NULL
; flag
++)
8416 if (length
== strlen (flag
->name
)
8417 && !strncmp (flag
->name
, token
, length
))
8421 error ("unknown flag passed in -moverride=%s (%s)", option_name
, token
);
8425 /* Parse OPTION which is a comma-separated list of flags to enable.
8426 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8427 default state we inherit from the CPU tuning structures. OPTION_NAME
8428 gives the top-level option we are parsing in the -moverride string,
8429 for use in error messages. */
8432 aarch64_parse_boolean_options (const char *option
,
8433 const struct aarch64_flag_desc
*flags
,
8434 unsigned int initial_state
,
8435 const char *option_name
)
8437 const char separator
= '.';
8438 const char* specs
= option
;
8439 const char* ntoken
= option
;
8440 unsigned int found_flags
= initial_state
;
8442 while ((ntoken
= strchr (specs
, separator
)))
8444 size_t token_length
= ntoken
- specs
;
8445 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8449 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8450 in the token stream, reset the supported operations. So:
8452 adrp+add.cmp+branch.none.adrp+add
8454 would have the result of turning on only adrp+add fusion. */
8458 found_flags
|= token_ops
;
8462 /* We ended with a comma, print something. */
8465 error ("%s string ill-formed\n", option_name
);
8469 /* We still have one more token to parse. */
8470 size_t token_length
= strlen (specs
);
8471 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8478 found_flags
|= token_ops
;
8482 /* Support for overriding instruction fusion. */
8485 aarch64_parse_fuse_string (const char *fuse_string
,
8486 struct tune_params
*tune
)
8488 tune
->fusible_ops
= aarch64_parse_boolean_options (fuse_string
,
8489 aarch64_fusible_pairs
,
8494 /* Support for overriding other tuning flags. */
8497 aarch64_parse_tune_string (const char *tune_string
,
8498 struct tune_params
*tune
)
8500 tune
->extra_tuning_flags
8501 = aarch64_parse_boolean_options (tune_string
,
8502 aarch64_tuning_flags
,
8503 tune
->extra_tuning_flags
,
8507 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8508 we understand. If it is, extract the option string and handoff to
8509 the appropriate function. */
8512 aarch64_parse_one_override_token (const char* token
,
8514 struct tune_params
*tune
)
8516 const struct aarch64_tuning_override_function
*fn
8517 = aarch64_tuning_override_functions
;
8519 const char *option_part
= strchr (token
, '=');
8522 error ("tuning string missing in option (%s)", token
);
8526 /* Get the length of the option name. */
8527 length
= option_part
- token
;
8528 /* Skip the '=' to get to the option string. */
8531 for (; fn
->name
!= NULL
; fn
++)
8533 if (!strncmp (fn
->name
, token
, length
))
8535 fn
->parse_override (option_part
, tune
);
8540 error ("unknown tuning option (%s)",token
);
8544 /* A checking mechanism for the implementation of the tls size. */
8547 initialize_aarch64_tls_size (struct gcc_options
*opts
)
8549 if (aarch64_tls_size
== 0)
8550 aarch64_tls_size
= 24;
8552 switch (opts
->x_aarch64_cmodel_var
)
8554 case AARCH64_CMODEL_TINY
:
8555 /* Both the default and maximum TLS size allowed under tiny is 1M which
8556 needs two instructions to address, so we clamp the size to 24. */
8557 if (aarch64_tls_size
> 24)
8558 aarch64_tls_size
= 24;
8560 case AARCH64_CMODEL_SMALL
:
8561 /* The maximum TLS size allowed under small is 4G. */
8562 if (aarch64_tls_size
> 32)
8563 aarch64_tls_size
= 32;
8565 case AARCH64_CMODEL_LARGE
:
8566 /* The maximum TLS size allowed under large is 16E.
8567 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8568 if (aarch64_tls_size
> 48)
8569 aarch64_tls_size
= 48;
8578 /* Parse STRING looking for options in the format:
8579 string :: option:string
8580 option :: name=substring
8582 substring :: defined by option. */
8585 aarch64_parse_override_string (const char* input_string
,
8586 struct tune_params
* tune
)
8588 const char separator
= ':';
8589 size_t string_length
= strlen (input_string
) + 1;
8590 char *string_root
= (char *) xmalloc (sizeof (*string_root
) * string_length
);
8591 char *string
= string_root
;
8592 strncpy (string
, input_string
, string_length
);
8593 string
[string_length
- 1] = '\0';
8595 char* ntoken
= string
;
8597 while ((ntoken
= strchr (string
, separator
)))
8599 size_t token_length
= ntoken
- string
;
8600 /* Make this substring look like a string. */
8602 aarch64_parse_one_override_token (string
, token_length
, tune
);
8606 /* One last option to parse. */
8607 aarch64_parse_one_override_token (string
, strlen (string
), tune
);
8613 aarch64_override_options_after_change_1 (struct gcc_options
*opts
)
8615 /* The logic here is that if we are disabling all frame pointer generation
8616 then we do not need to disable leaf frame pointer generation as a
8617 separate operation. But if we are *only* disabling leaf frame pointer
8618 generation then we set flag_omit_frame_pointer to true, but in
8619 aarch64_frame_pointer_required we return false only for leaf functions.
8621 PR 70044: We have to be careful about being called multiple times for the
8622 same function. Once we have decided to set flag_omit_frame_pointer just
8623 so that we can omit leaf frame pointers, we must then not interpret a
8624 second call as meaning that all frame pointer generation should be
8625 omitted. We do this by setting flag_omit_frame_pointer to a special,
8627 if (opts
->x_flag_omit_frame_pointer
== 2)
8628 opts
->x_flag_omit_frame_pointer
= 0;
8630 if (opts
->x_flag_omit_frame_pointer
)
8631 opts
->x_flag_omit_leaf_frame_pointer
= false;
8632 else if (opts
->x_flag_omit_leaf_frame_pointer
)
8633 opts
->x_flag_omit_frame_pointer
= 2;
8635 /* If not optimizing for size, set the default
8636 alignment to what the target wants. */
8637 if (!opts
->x_optimize_size
)
8639 if (opts
->x_align_loops
<= 0)
8640 opts
->x_align_loops
= aarch64_tune_params
.loop_align
;
8641 if (opts
->x_align_jumps
<= 0)
8642 opts
->x_align_jumps
= aarch64_tune_params
.jump_align
;
8643 if (opts
->x_align_functions
<= 0)
8644 opts
->x_align_functions
= aarch64_tune_params
.function_align
;
8647 /* We default to no pc-relative literal loads. */
8649 aarch64_pcrelative_literal_loads
= false;
8651 /* If -mpc-relative-literal-loads is set on the command line, this
8652 implies that the user asked for PC relative literal loads. */
8653 if (opts
->x_pcrelative_literal_loads
== 1)
8654 aarch64_pcrelative_literal_loads
= true;
8656 /* This is PR70113. When building the Linux kernel with
8657 CONFIG_ARM64_ERRATUM_843419, support for relocations
8658 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8659 removed from the kernel to avoid loading objects with possibly
8660 offending sequences. Without -mpc-relative-literal-loads we would
8661 generate such relocations, preventing the kernel build from
8663 if (opts
->x_pcrelative_literal_loads
== 2
8664 && TARGET_FIX_ERR_A53_843419
)
8665 aarch64_pcrelative_literal_loads
= true;
8667 /* In the tiny memory model it makes no sense to disallow PC relative
8668 literal pool loads. */
8669 if (aarch64_cmodel
== AARCH64_CMODEL_TINY
8670 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
8671 aarch64_pcrelative_literal_loads
= true;
8673 /* When enabling the lower precision Newton series for the square root, also
8674 enable it for the reciprocal square root, since the latter is an
8675 intermediary step for the former. */
8676 if (flag_mlow_precision_sqrt
)
8677 flag_mrecip_low_precision_sqrt
= true;
8680 /* 'Unpack' up the internal tuning structs and update the options
8681 in OPTS. The caller must have set up selected_tune and selected_arch
8682 as all the other target-specific codegen decisions are
8683 derived from them. */
8686 aarch64_override_options_internal (struct gcc_options
*opts
)
8688 aarch64_tune_flags
= selected_tune
->flags
;
8689 aarch64_tune
= selected_tune
->sched_core
;
8690 /* Make a copy of the tuning parameters attached to the core, which
8691 we may later overwrite. */
8692 aarch64_tune_params
= *(selected_tune
->tune
);
8693 aarch64_architecture_version
= selected_arch
->architecture_version
;
8695 if (opts
->x_aarch64_override_tune_string
)
8696 aarch64_parse_override_string (opts
->x_aarch64_override_tune_string
,
8697 &aarch64_tune_params
);
8699 /* This target defaults to strict volatile bitfields. */
8700 if (opts
->x_flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
8701 opts
->x_flag_strict_volatile_bitfields
= 1;
8703 initialize_aarch64_code_model (opts
);
8704 initialize_aarch64_tls_size (opts
);
8706 int queue_depth
= 0;
8707 switch (aarch64_tune_params
.autoprefetcher_model
)
8709 case tune_params::AUTOPREFETCHER_OFF
:
8712 case tune_params::AUTOPREFETCHER_WEAK
:
8715 case tune_params::AUTOPREFETCHER_STRONG
:
8716 queue_depth
= max_insn_queue_index
+ 1;
8722 /* We don't mind passing in global_options_set here as we don't use
8723 the *options_set structs anyway. */
8724 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
8726 opts
->x_param_values
,
8727 global_options_set
.x_param_values
);
8729 /* Set the L1 cache line size. */
8730 if (selected_cpu
->tune
->cache_line_size
!= 0)
8731 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
8732 selected_cpu
->tune
->cache_line_size
,
8733 opts
->x_param_values
,
8734 global_options_set
.x_param_values
);
8736 aarch64_override_options_after_change_1 (opts
);
8739 /* Print a hint with a suggestion for a core or architecture name that
8740 most closely resembles what the user passed in STR. ARCH is true if
8741 the user is asking for an architecture name. ARCH is false if the user
8742 is asking for a core name. */
8745 aarch64_print_hint_for_core_or_arch (const char *str
, bool arch
)
8747 auto_vec
<const char *> candidates
;
8748 const struct processor
*entry
= arch
? all_architectures
: all_cores
;
8749 for (; entry
->name
!= NULL
; entry
++)
8750 candidates
.safe_push (entry
->name
);
8752 const char *hint
= candidates_list_and_hint (str
, s
, candidates
);
8754 inform (input_location
, "valid arguments are: %s;"
8755 " did you mean %qs?", s
, hint
);
8759 /* Print a hint with a suggestion for a core name that most closely resembles
8760 what the user passed in STR. */
8763 aarch64_print_hint_for_core (const char *str
)
8765 aarch64_print_hint_for_core_or_arch (str
, false);
8768 /* Print a hint with a suggestion for an architecture name that most closely
8769 resembles what the user passed in STR. */
8772 aarch64_print_hint_for_arch (const char *str
)
8774 aarch64_print_hint_for_core_or_arch (str
, true);
8777 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8778 specified in STR and throw errors if appropriate. Put the results if
8779 they are valid in RES and ISA_FLAGS. Return whether the option is
8783 aarch64_validate_mcpu (const char *str
, const struct processor
**res
,
8784 unsigned long *isa_flags
)
8786 enum aarch64_parse_opt_result parse_res
8787 = aarch64_parse_cpu (str
, res
, isa_flags
);
8789 if (parse_res
== AARCH64_PARSE_OK
)
8794 case AARCH64_PARSE_MISSING_ARG
:
8795 error ("missing cpu name in %<-mcpu=%s%>", str
);
8797 case AARCH64_PARSE_INVALID_ARG
:
8798 error ("unknown value %qs for -mcpu", str
);
8799 aarch64_print_hint_for_core (str
);
8801 case AARCH64_PARSE_INVALID_FEATURE
:
8802 error ("invalid feature modifier in %<-mcpu=%s%>", str
);
8811 /* Validate a command-line -march option. Parse the arch and extensions
8812 (if any) specified in STR and throw errors if appropriate. Put the
8813 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8817 aarch64_validate_march (const char *str
, const struct processor
**res
,
8818 unsigned long *isa_flags
)
8820 enum aarch64_parse_opt_result parse_res
8821 = aarch64_parse_arch (str
, res
, isa_flags
);
8823 if (parse_res
== AARCH64_PARSE_OK
)
8828 case AARCH64_PARSE_MISSING_ARG
:
8829 error ("missing arch name in %<-march=%s%>", str
);
8831 case AARCH64_PARSE_INVALID_ARG
:
8832 error ("unknown value %qs for -march", str
);
8833 aarch64_print_hint_for_arch (str
);
8835 case AARCH64_PARSE_INVALID_FEATURE
:
8836 error ("invalid feature modifier in %<-march=%s%>", str
);
8845 /* Validate a command-line -mtune option. Parse the cpu
8846 specified in STR and throw errors if appropriate. Put the
8847 result, if it is valid, in RES. Return whether the option is
8851 aarch64_validate_mtune (const char *str
, const struct processor
**res
)
8853 enum aarch64_parse_opt_result parse_res
8854 = aarch64_parse_tune (str
, res
);
8856 if (parse_res
== AARCH64_PARSE_OK
)
8861 case AARCH64_PARSE_MISSING_ARG
:
8862 error ("missing cpu name in %<-mtune=%s%>", str
);
8864 case AARCH64_PARSE_INVALID_ARG
:
8865 error ("unknown value %qs for -mtune", str
);
8866 aarch64_print_hint_for_core (str
);
8874 /* Return the CPU corresponding to the enum CPU.
8875 If it doesn't specify a cpu, return the default. */
8877 static const struct processor
*
8878 aarch64_get_tune_cpu (enum aarch64_processor cpu
)
8880 if (cpu
!= aarch64_none
)
8881 return &all_cores
[cpu
];
8883 /* The & 0x3f is to extract the bottom 6 bits that encode the
8884 default cpu as selected by the --with-cpu GCC configure option
8886 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8887 flags mechanism should be reworked to make it more sane. */
8888 return &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
8891 /* Return the architecture corresponding to the enum ARCH.
8892 If it doesn't specify a valid architecture, return the default. */
8894 static const struct processor
*
8895 aarch64_get_arch (enum aarch64_arch arch
)
8897 if (arch
!= aarch64_no_arch
)
8898 return &all_architectures
[arch
];
8900 const struct processor
*cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
8902 return &all_architectures
[cpu
->arch
];
8905 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8906 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8907 tuning structs. In particular it must set selected_tune and
8908 aarch64_isa_flags that define the available ISA features and tuning
8909 decisions. It must also set selected_arch as this will be used to
8910 output the .arch asm tags for each function. */
8913 aarch64_override_options (void)
8915 unsigned long cpu_isa
= 0;
8916 unsigned long arch_isa
= 0;
8917 aarch64_isa_flags
= 0;
8919 bool valid_cpu
= true;
8920 bool valid_tune
= true;
8921 bool valid_arch
= true;
8923 selected_cpu
= NULL
;
8924 selected_arch
= NULL
;
8925 selected_tune
= NULL
;
8927 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
8928 If either of -march or -mtune is given, they override their
8929 respective component of -mcpu. */
8930 if (aarch64_cpu_string
)
8931 valid_cpu
= aarch64_validate_mcpu (aarch64_cpu_string
, &selected_cpu
,
8934 if (aarch64_arch_string
)
8935 valid_arch
= aarch64_validate_march (aarch64_arch_string
, &selected_arch
,
8938 if (aarch64_tune_string
)
8939 valid_tune
= aarch64_validate_mtune (aarch64_tune_string
, &selected_tune
);
8941 /* If the user did not specify a processor, choose the default
8942 one for them. This will be the CPU set during configuration using
8943 --with-cpu, otherwise it is "generic". */
8948 selected_cpu
= &all_cores
[selected_arch
->ident
];
8949 aarch64_isa_flags
= arch_isa
;
8950 explicit_arch
= selected_arch
->arch
;
8954 /* Get default configure-time CPU. */
8955 selected_cpu
= aarch64_get_tune_cpu (aarch64_none
);
8956 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
8960 explicit_tune_core
= selected_tune
->ident
;
8962 /* If both -mcpu and -march are specified check that they are architecturally
8963 compatible, warn if they're not and prefer the -march ISA flags. */
8964 else if (selected_arch
)
8966 if (selected_arch
->arch
!= selected_cpu
->arch
)
8968 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
8969 all_architectures
[selected_cpu
->arch
].name
,
8970 selected_arch
->name
);
8972 aarch64_isa_flags
= arch_isa
;
8973 explicit_arch
= selected_arch
->arch
;
8974 explicit_tune_core
= selected_tune
? selected_tune
->ident
8975 : selected_cpu
->ident
;
8979 /* -mcpu but no -march. */
8980 aarch64_isa_flags
= cpu_isa
;
8981 explicit_tune_core
= selected_tune
? selected_tune
->ident
8982 : selected_cpu
->ident
;
8983 gcc_assert (selected_cpu
);
8984 selected_arch
= &all_architectures
[selected_cpu
->arch
];
8985 explicit_arch
= selected_arch
->arch
;
8988 /* Set the arch as well as we will need it when outputing
8989 the .arch directive in assembly. */
8992 gcc_assert (selected_cpu
);
8993 selected_arch
= &all_architectures
[selected_cpu
->arch
];
8997 selected_tune
= selected_cpu
;
8999 #ifndef HAVE_AS_MABI_OPTION
9000 /* The compiler may have been configured with 2.23.* binutils, which does
9001 not have support for ILP32. */
9003 error ("Assembler does not support -mabi=ilp32");
9006 if (aarch64_ra_sign_scope
!= AARCH64_FUNCTION_NONE
&& TARGET_ILP32
)
9007 sorry ("Return address signing is only supported for -mabi=lp64");
9009 /* Make sure we properly set up the explicit options. */
9010 if ((aarch64_cpu_string
&& valid_cpu
)
9011 || (aarch64_tune_string
&& valid_tune
))
9012 gcc_assert (explicit_tune_core
!= aarch64_none
);
9014 if ((aarch64_cpu_string
&& valid_cpu
)
9015 || (aarch64_arch_string
&& valid_arch
))
9016 gcc_assert (explicit_arch
!= aarch64_no_arch
);
9018 aarch64_override_options_internal (&global_options
);
9020 /* Save these options as the default ones in case we push and pop them later
9021 while processing functions with potential target attributes. */
9022 target_option_default_node
= target_option_current_node
9023 = build_target_option_node (&global_options
);
9026 /* Implement targetm.override_options_after_change. */
9029 aarch64_override_options_after_change (void)
9031 aarch64_override_options_after_change_1 (&global_options
);
9034 static struct machine_function
*
9035 aarch64_init_machine_status (void)
9037 struct machine_function
*machine
;
9038 machine
= ggc_cleared_alloc
<machine_function
> ();
9043 aarch64_init_expanders (void)
9045 init_machine_status
= aarch64_init_machine_status
;
9048 /* A checking mechanism for the implementation of the various code models. */
9050 initialize_aarch64_code_model (struct gcc_options
*opts
)
9052 if (opts
->x_flag_pic
)
9054 switch (opts
->x_aarch64_cmodel_var
)
9056 case AARCH64_CMODEL_TINY
:
9057 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
9059 case AARCH64_CMODEL_SMALL
:
9060 #ifdef HAVE_AS_SMALL_PIC_RELOCS
9061 aarch64_cmodel
= (flag_pic
== 2
9062 ? AARCH64_CMODEL_SMALL_PIC
9063 : AARCH64_CMODEL_SMALL_SPIC
);
9065 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
9068 case AARCH64_CMODEL_LARGE
:
9069 sorry ("code model %qs with -f%s", "large",
9070 opts
->x_flag_pic
> 1 ? "PIC" : "pic");
9077 aarch64_cmodel
= opts
->x_aarch64_cmodel_var
;
9080 /* Implement TARGET_OPTION_SAVE. */
9083 aarch64_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
9085 ptr
->x_aarch64_override_tune_string
= opts
->x_aarch64_override_tune_string
;
9088 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9089 using the information saved in PTR. */
9092 aarch64_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
9094 opts
->x_explicit_tune_core
= ptr
->x_explicit_tune_core
;
9095 selected_tune
= aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9096 opts
->x_explicit_arch
= ptr
->x_explicit_arch
;
9097 selected_arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9098 opts
->x_aarch64_override_tune_string
= ptr
->x_aarch64_override_tune_string
;
9100 aarch64_override_options_internal (opts
);
9103 /* Implement TARGET_OPTION_PRINT. */
9106 aarch64_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
9108 const struct processor
*cpu
9109 = aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9110 unsigned long isa_flags
= ptr
->x_aarch64_isa_flags
;
9111 const struct processor
*arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9112 std::string extension
9113 = aarch64_get_extension_string_for_isa_flags (isa_flags
, arch
->flags
);
9115 fprintf (file
, "%*sselected tune = %s\n", indent
, "", cpu
->name
);
9116 fprintf (file
, "%*sselected arch = %s%s\n", indent
, "",
9117 arch
->name
, extension
.c_str ());
9120 static GTY(()) tree aarch64_previous_fndecl
;
9123 aarch64_reset_previous_fndecl (void)
9125 aarch64_previous_fndecl
= NULL
;
9128 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9129 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9130 make sure optab availability predicates are recomputed when necessary. */
9133 aarch64_save_restore_target_globals (tree new_tree
)
9135 if (TREE_TARGET_GLOBALS (new_tree
))
9136 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
9137 else if (new_tree
== target_option_default_node
)
9138 restore_target_globals (&default_target_globals
);
9140 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
9143 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9144 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9145 of the function, if such exists. This function may be called multiple
9146 times on a single function so use aarch64_previous_fndecl to avoid
9147 setting up identical state. */
9150 aarch64_set_current_function (tree fndecl
)
9152 if (!fndecl
|| fndecl
== aarch64_previous_fndecl
)
9155 tree old_tree
= (aarch64_previous_fndecl
9156 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl
)
9159 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9161 /* If current function has no attributes but the previous one did,
9162 use the default node. */
9163 if (!new_tree
&& old_tree
)
9164 new_tree
= target_option_default_node
;
9166 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9167 the default have been handled by aarch64_save_restore_target_globals from
9168 aarch64_pragma_target_parse. */
9169 if (old_tree
== new_tree
)
9172 aarch64_previous_fndecl
= fndecl
;
9174 /* First set the target options. */
9175 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
9177 aarch64_save_restore_target_globals (new_tree
);
9180 /* Enum describing the various ways we can handle attributes.
9181 In many cases we can reuse the generic option handling machinery. */
9183 enum aarch64_attr_opt_type
9185 aarch64_attr_mask
, /* Attribute should set a bit in target_flags. */
9186 aarch64_attr_bool
, /* Attribute sets or unsets a boolean variable. */
9187 aarch64_attr_enum
, /* Attribute sets an enum variable. */
9188 aarch64_attr_custom
/* Attribute requires a custom handling function. */
9191 /* All the information needed to handle a target attribute.
9192 NAME is the name of the attribute.
9193 ATTR_TYPE specifies the type of behavior of the attribute as described
9194 in the definition of enum aarch64_attr_opt_type.
9195 ALLOW_NEG is true if the attribute supports a "no-" form.
9196 HANDLER is the function that takes the attribute string and whether
9197 it is a pragma or attribute and handles the option. It is needed only
9198 when the ATTR_TYPE is aarch64_attr_custom.
9199 OPT_NUM is the enum specifying the option that the attribute modifies.
9200 This is needed for attributes that mirror the behavior of a command-line
9201 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9202 aarch64_attr_enum. */
9204 struct aarch64_attribute_info
9207 enum aarch64_attr_opt_type attr_type
;
9209 bool (*handler
) (const char *, const char *);
9210 enum opt_code opt_num
;
9213 /* Handle the ARCH_STR argument to the arch= target attribute.
9214 PRAGMA_OR_ATTR is used in potential error messages. */
9217 aarch64_handle_attr_arch (const char *str
, const char *pragma_or_attr
)
9219 const struct processor
*tmp_arch
= NULL
;
9220 enum aarch64_parse_opt_result parse_res
9221 = aarch64_parse_arch (str
, &tmp_arch
, &aarch64_isa_flags
);
9223 if (parse_res
== AARCH64_PARSE_OK
)
9225 gcc_assert (tmp_arch
);
9226 selected_arch
= tmp_arch
;
9227 explicit_arch
= selected_arch
->arch
;
9233 case AARCH64_PARSE_MISSING_ARG
:
9234 error ("missing architecture name in 'arch' target %s", pragma_or_attr
);
9236 case AARCH64_PARSE_INVALID_ARG
:
9237 error ("unknown value %qs for 'arch' target %s", str
, pragma_or_attr
);
9238 aarch64_print_hint_for_arch (str
);
9240 case AARCH64_PARSE_INVALID_FEATURE
:
9241 error ("invalid feature modifier %qs for 'arch' target %s",
9242 str
, pragma_or_attr
);
9251 /* Handle the argument CPU_STR to the cpu= target attribute.
9252 PRAGMA_OR_ATTR is used in potential error messages. */
9255 aarch64_handle_attr_cpu (const char *str
, const char *pragma_or_attr
)
9257 const struct processor
*tmp_cpu
= NULL
;
9258 enum aarch64_parse_opt_result parse_res
9259 = aarch64_parse_cpu (str
, &tmp_cpu
, &aarch64_isa_flags
);
9261 if (parse_res
== AARCH64_PARSE_OK
)
9263 gcc_assert (tmp_cpu
);
9264 selected_tune
= tmp_cpu
;
9265 explicit_tune_core
= selected_tune
->ident
;
9267 selected_arch
= &all_architectures
[tmp_cpu
->arch
];
9268 explicit_arch
= selected_arch
->arch
;
9274 case AARCH64_PARSE_MISSING_ARG
:
9275 error ("missing cpu name in 'cpu' target %s", pragma_or_attr
);
9277 case AARCH64_PARSE_INVALID_ARG
:
9278 error ("unknown value %qs for 'cpu' target %s", str
, pragma_or_attr
);
9279 aarch64_print_hint_for_core (str
);
9281 case AARCH64_PARSE_INVALID_FEATURE
:
9282 error ("invalid feature modifier %qs for 'cpu' target %s",
9283 str
, pragma_or_attr
);
9292 /* Handle the argument STR to the tune= target attribute.
9293 PRAGMA_OR_ATTR is used in potential error messages. */
9296 aarch64_handle_attr_tune (const char *str
, const char *pragma_or_attr
)
9298 const struct processor
*tmp_tune
= NULL
;
9299 enum aarch64_parse_opt_result parse_res
9300 = aarch64_parse_tune (str
, &tmp_tune
);
9302 if (parse_res
== AARCH64_PARSE_OK
)
9304 gcc_assert (tmp_tune
);
9305 selected_tune
= tmp_tune
;
9306 explicit_tune_core
= selected_tune
->ident
;
9312 case AARCH64_PARSE_INVALID_ARG
:
9313 error ("unknown value %qs for 'tune' target %s", str
, pragma_or_attr
);
9314 aarch64_print_hint_for_core (str
);
9323 /* Parse an architecture extensions target attribute string specified in STR.
9324 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9325 if successful. Update aarch64_isa_flags to reflect the ISA features
9327 PRAGMA_OR_ATTR is used in potential error messages. */
9330 aarch64_handle_attr_isa_flags (char *str
, const char *pragma_or_attr
)
9332 enum aarch64_parse_opt_result parse_res
;
9333 unsigned long isa_flags
= aarch64_isa_flags
;
9335 /* We allow "+nothing" in the beginning to clear out all architectural
9336 features if the user wants to handpick specific features. */
9337 if (strncmp ("+nothing", str
, 8) == 0)
9343 parse_res
= aarch64_parse_extension (str
, &isa_flags
);
9345 if (parse_res
== AARCH64_PARSE_OK
)
9347 aarch64_isa_flags
= isa_flags
;
9353 case AARCH64_PARSE_MISSING_ARG
:
9354 error ("missing feature modifier in target %s %qs",
9355 pragma_or_attr
, str
);
9358 case AARCH64_PARSE_INVALID_FEATURE
:
9359 error ("invalid feature modifier in target %s %qs",
9360 pragma_or_attr
, str
);
9370 /* The target attributes that we support. On top of these we also support just
9371 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9372 handled explicitly in aarch64_process_one_target_attr. */
9374 static const struct aarch64_attribute_info aarch64_attributes
[] =
9376 { "general-regs-only", aarch64_attr_mask
, false, NULL
,
9377 OPT_mgeneral_regs_only
},
9378 { "fix-cortex-a53-835769", aarch64_attr_bool
, true, NULL
,
9379 OPT_mfix_cortex_a53_835769
},
9380 { "fix-cortex-a53-843419", aarch64_attr_bool
, true, NULL
,
9381 OPT_mfix_cortex_a53_843419
},
9382 { "cmodel", aarch64_attr_enum
, false, NULL
, OPT_mcmodel_
},
9383 { "strict-align", aarch64_attr_mask
, false, NULL
, OPT_mstrict_align
},
9384 { "omit-leaf-frame-pointer", aarch64_attr_bool
, true, NULL
,
9385 OPT_momit_leaf_frame_pointer
},
9386 { "tls-dialect", aarch64_attr_enum
, false, NULL
, OPT_mtls_dialect_
},
9387 { "arch", aarch64_attr_custom
, false, aarch64_handle_attr_arch
,
9389 { "cpu", aarch64_attr_custom
, false, aarch64_handle_attr_cpu
, OPT_mcpu_
},
9390 { "tune", aarch64_attr_custom
, false, aarch64_handle_attr_tune
,
9392 { "sign-return-address", aarch64_attr_enum
, false, NULL
,
9393 OPT_msign_return_address_
},
9394 { NULL
, aarch64_attr_custom
, false, NULL
, OPT____
}
9397 /* Parse ARG_STR which contains the definition of one target attribute.
9398 Show appropriate errors if any or return true if the attribute is valid.
9399 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9400 we're processing a target attribute or pragma. */
9403 aarch64_process_one_target_attr (char *arg_str
, const char* pragma_or_attr
)
9405 bool invert
= false;
9407 size_t len
= strlen (arg_str
);
9411 error ("malformed target %s", pragma_or_attr
);
9415 char *str_to_check
= (char *) alloca (len
+ 1);
9416 strcpy (str_to_check
, arg_str
);
9418 /* Skip leading whitespace. */
9419 while (*str_to_check
== ' ' || *str_to_check
== '\t')
9422 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9423 It is easier to detect and handle it explicitly here rather than going
9424 through the machinery for the rest of the target attributes in this
9426 if (*str_to_check
== '+')
9427 return aarch64_handle_attr_isa_flags (str_to_check
, pragma_or_attr
);
9429 if (len
> 3 && strncmp (str_to_check
, "no-", 3) == 0)
9434 char *arg
= strchr (str_to_check
, '=');
9436 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9437 and point ARG to "foo". */
9443 const struct aarch64_attribute_info
*p_attr
;
9445 for (p_attr
= aarch64_attributes
; p_attr
->name
; p_attr
++)
9447 /* If the names don't match up, or the user has given an argument
9448 to an attribute that doesn't accept one, or didn't give an argument
9449 to an attribute that expects one, fail to match. */
9450 if (strcmp (str_to_check
, p_attr
->name
) != 0)
9454 bool attr_need_arg_p
= p_attr
->attr_type
== aarch64_attr_custom
9455 || p_attr
->attr_type
== aarch64_attr_enum
;
9457 if (attr_need_arg_p
^ (arg
!= NULL
))
9459 error ("target %s %qs does not accept an argument",
9460 pragma_or_attr
, str_to_check
);
9464 /* If the name matches but the attribute does not allow "no-" versions
9465 then we can't match. */
9466 if (invert
&& !p_attr
->allow_neg
)
9468 error ("target %s %qs does not allow a negated form",
9469 pragma_or_attr
, str_to_check
);
9473 switch (p_attr
->attr_type
)
9475 /* Has a custom handler registered.
9476 For example, cpu=, arch=, tune=. */
9477 case aarch64_attr_custom
:
9478 gcc_assert (p_attr
->handler
);
9479 if (!p_attr
->handler (arg
, pragma_or_attr
))
9483 /* Either set or unset a boolean option. */
9484 case aarch64_attr_bool
:
9486 struct cl_decoded_option decoded
;
9488 generate_option (p_attr
->opt_num
, NULL
, !invert
,
9489 CL_TARGET
, &decoded
);
9490 aarch64_handle_option (&global_options
, &global_options_set
,
9491 &decoded
, input_location
);
9494 /* Set or unset a bit in the target_flags. aarch64_handle_option
9495 should know what mask to apply given the option number. */
9496 case aarch64_attr_mask
:
9498 struct cl_decoded_option decoded
;
9499 /* We only need to specify the option number.
9500 aarch64_handle_option will know which mask to apply. */
9501 decoded
.opt_index
= p_attr
->opt_num
;
9502 decoded
.value
= !invert
;
9503 aarch64_handle_option (&global_options
, &global_options_set
,
9504 &decoded
, input_location
);
9507 /* Use the option setting machinery to set an option to an enum. */
9508 case aarch64_attr_enum
:
9513 valid
= opt_enum_arg_to_value (p_attr
->opt_num
, arg
,
9517 set_option (&global_options
, NULL
, p_attr
->opt_num
, value
,
9518 NULL
, DK_UNSPECIFIED
, input_location
,
9523 error ("target %s %s=%s is not valid",
9524 pragma_or_attr
, str_to_check
, arg
);
9533 /* If we reached here we either have found an attribute and validated
9534 it or didn't match any. If we matched an attribute but its arguments
9535 were malformed we will have returned false already. */
9539 /* Count how many times the character C appears in
9540 NULL-terminated string STR. */
9543 num_occurences_in_str (char c
, char *str
)
9545 unsigned int res
= 0;
9546 while (*str
!= '\0')
9557 /* Parse the tree in ARGS that contains the target attribute information
9558 and update the global target options space. PRAGMA_OR_ATTR is a string
9559 to be used in error messages, specifying whether this is processing
9560 a target attribute or a target pragma. */
9563 aarch64_process_target_attr (tree args
, const char* pragma_or_attr
)
9565 if (TREE_CODE (args
) == TREE_LIST
)
9569 tree head
= TREE_VALUE (args
);
9572 if (!aarch64_process_target_attr (head
, pragma_or_attr
))
9575 args
= TREE_CHAIN (args
);
9581 if (TREE_CODE (args
) != STRING_CST
)
9583 error ("attribute %<target%> argument not a string");
9587 size_t len
= strlen (TREE_STRING_POINTER (args
));
9588 char *str_to_check
= (char *) alloca (len
+ 1);
9589 strcpy (str_to_check
, TREE_STRING_POINTER (args
));
9593 error ("malformed target %s value", pragma_or_attr
);
9597 /* Used to catch empty spaces between commas i.e.
9598 attribute ((target ("attr1,,attr2"))). */
9599 unsigned int num_commas
= num_occurences_in_str (',', str_to_check
);
9601 /* Handle multiple target attributes separated by ','. */
9602 char *token
= strtok (str_to_check
, ",");
9604 unsigned int num_attrs
= 0;
9608 if (!aarch64_process_one_target_attr (token
, pragma_or_attr
))
9610 error ("target %s %qs is invalid", pragma_or_attr
, token
);
9614 token
= strtok (NULL
, ",");
9617 if (num_attrs
!= num_commas
+ 1)
9619 error ("malformed target %s list %qs",
9620 pragma_or_attr
, TREE_STRING_POINTER (args
));
9627 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9628 process attribute ((target ("..."))). */
9631 aarch64_option_valid_attribute_p (tree fndecl
, tree
, tree args
, int)
9633 struct cl_target_option cur_target
;
9636 tree new_target
, new_optimize
;
9637 tree existing_target
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9639 /* If what we're processing is the current pragma string then the
9640 target option node is already stored in target_option_current_node
9641 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9642 having to re-parse the string. This is especially useful to keep
9643 arm_neon.h compile times down since that header contains a lot
9644 of intrinsics enclosed in pragmas. */
9645 if (!existing_target
&& args
== current_target_pragma
)
9647 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = target_option_current_node
;
9650 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
9652 old_optimize
= build_optimization_node (&global_options
);
9653 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
9655 /* If the function changed the optimization levels as well as setting
9656 target options, start with the optimizations specified. */
9657 if (func_optimize
&& func_optimize
!= old_optimize
)
9658 cl_optimization_restore (&global_options
,
9659 TREE_OPTIMIZATION (func_optimize
));
9661 /* Save the current target options to restore at the end. */
9662 cl_target_option_save (&cur_target
, &global_options
);
9664 /* If fndecl already has some target attributes applied to it, unpack
9665 them so that we add this attribute on top of them, rather than
9666 overwriting them. */
9667 if (existing_target
)
9669 struct cl_target_option
*existing_options
9670 = TREE_TARGET_OPTION (existing_target
);
9672 if (existing_options
)
9673 cl_target_option_restore (&global_options
, existing_options
);
9676 cl_target_option_restore (&global_options
,
9677 TREE_TARGET_OPTION (target_option_current_node
));
9680 ret
= aarch64_process_target_attr (args
, "attribute");
9682 /* Set up any additional state. */
9685 aarch64_override_options_internal (&global_options
);
9686 /* Initialize SIMD builtins if we haven't already.
9687 Set current_target_pragma to NULL for the duration so that
9688 the builtin initialization code doesn't try to tag the functions
9689 being built with the attributes specified by any current pragma, thus
9690 going into an infinite recursion. */
9693 tree saved_current_target_pragma
= current_target_pragma
;
9694 current_target_pragma
= NULL
;
9695 aarch64_init_simd_builtins ();
9696 current_target_pragma
= saved_current_target_pragma
;
9698 new_target
= build_target_option_node (&global_options
);
9703 new_optimize
= build_optimization_node (&global_options
);
9707 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
9709 if (old_optimize
!= new_optimize
)
9710 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
9713 cl_target_option_restore (&global_options
, &cur_target
);
9715 if (old_optimize
!= new_optimize
)
9716 cl_optimization_restore (&global_options
,
9717 TREE_OPTIMIZATION (old_optimize
));
9721 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9722 tri-bool options (yes, no, don't care) and the default value is
9723 DEF, determine whether to reject inlining. */
9726 aarch64_tribools_ok_for_inlining_p (int caller
, int callee
,
9727 int dont_care
, int def
)
9729 /* If the callee doesn't care, always allow inlining. */
9730 if (callee
== dont_care
)
9733 /* If the caller doesn't care, always allow inlining. */
9734 if (caller
== dont_care
)
9737 /* Otherwise, allow inlining if either the callee and caller values
9738 agree, or if the callee is using the default value. */
9739 return (callee
== caller
|| callee
== def
);
9742 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9743 to inline CALLEE into CALLER based on target-specific info.
9744 Make sure that the caller and callee have compatible architectural
9745 features. Then go through the other possible target attributes
9746 and see if they can block inlining. Try not to reject always_inline
9747 callees unless they are incompatible architecturally. */
9750 aarch64_can_inline_p (tree caller
, tree callee
)
9752 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
9753 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
9755 /* If callee has no option attributes, then it is ok to inline. */
9759 struct cl_target_option
*caller_opts
9760 = TREE_TARGET_OPTION (caller_tree
? caller_tree
9761 : target_option_default_node
);
9763 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
9766 /* Callee's ISA flags should be a subset of the caller's. */
9767 if ((caller_opts
->x_aarch64_isa_flags
& callee_opts
->x_aarch64_isa_flags
)
9768 != callee_opts
->x_aarch64_isa_flags
)
9771 /* Allow non-strict aligned functions inlining into strict
9773 if ((TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)
9774 != TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
))
9775 && !(!TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
)
9776 && TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)))
9779 bool always_inline
= lookup_attribute ("always_inline",
9780 DECL_ATTRIBUTES (callee
));
9782 /* If the architectural features match up and the callee is always_inline
9783 then the other attributes don't matter. */
9787 if (caller_opts
->x_aarch64_cmodel_var
9788 != callee_opts
->x_aarch64_cmodel_var
)
9791 if (caller_opts
->x_aarch64_tls_dialect
9792 != callee_opts
->x_aarch64_tls_dialect
)
9795 /* Honour explicit requests to workaround errata. */
9796 if (!aarch64_tribools_ok_for_inlining_p (
9797 caller_opts
->x_aarch64_fix_a53_err835769
,
9798 callee_opts
->x_aarch64_fix_a53_err835769
,
9799 2, TARGET_FIX_ERR_A53_835769_DEFAULT
))
9802 if (!aarch64_tribools_ok_for_inlining_p (
9803 caller_opts
->x_aarch64_fix_a53_err843419
,
9804 callee_opts
->x_aarch64_fix_a53_err843419
,
9805 2, TARGET_FIX_ERR_A53_843419
))
9808 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9809 caller and calle and they don't match up, reject inlining. */
9810 if (!aarch64_tribools_ok_for_inlining_p (
9811 caller_opts
->x_flag_omit_leaf_frame_pointer
,
9812 callee_opts
->x_flag_omit_leaf_frame_pointer
,
9816 /* If the callee has specific tuning overrides, respect them. */
9817 if (callee_opts
->x_aarch64_override_tune_string
!= NULL
9818 && caller_opts
->x_aarch64_override_tune_string
== NULL
)
9821 /* If the user specified tuning override strings for the
9822 caller and callee and they don't match up, reject inlining.
9823 We just do a string compare here, we don't analyze the meaning
9824 of the string, as it would be too costly for little gain. */
9825 if (callee_opts
->x_aarch64_override_tune_string
9826 && caller_opts
->x_aarch64_override_tune_string
9827 && (strcmp (callee_opts
->x_aarch64_override_tune_string
,
9828 caller_opts
->x_aarch64_override_tune_string
) != 0))
9834 /* Return true if SYMBOL_REF X binds locally. */
9837 aarch64_symbol_binds_local_p (const_rtx x
)
9839 return (SYMBOL_REF_DECL (x
)
9840 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
9841 : SYMBOL_REF_LOCAL_P (x
));
9844 /* Return true if SYMBOL_REF X is thread local */
9846 aarch64_tls_symbol_p (rtx x
)
9848 if (! TARGET_HAVE_TLS
)
9851 if (GET_CODE (x
) != SYMBOL_REF
)
9854 return SYMBOL_REF_TLS_MODEL (x
) != 0;
9857 /* Classify a TLS symbol into one of the TLS kinds. */
9858 enum aarch64_symbol_type
9859 aarch64_classify_tls_symbol (rtx x
)
9861 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
9865 case TLS_MODEL_GLOBAL_DYNAMIC
:
9866 case TLS_MODEL_LOCAL_DYNAMIC
:
9867 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
9869 case TLS_MODEL_INITIAL_EXEC
:
9870 switch (aarch64_cmodel
)
9872 case AARCH64_CMODEL_TINY
:
9873 case AARCH64_CMODEL_TINY_PIC
:
9874 return SYMBOL_TINY_TLSIE
;
9876 return SYMBOL_SMALL_TLSIE
;
9879 case TLS_MODEL_LOCAL_EXEC
:
9880 if (aarch64_tls_size
== 12)
9881 return SYMBOL_TLSLE12
;
9882 else if (aarch64_tls_size
== 24)
9883 return SYMBOL_TLSLE24
;
9884 else if (aarch64_tls_size
== 32)
9885 return SYMBOL_TLSLE32
;
9886 else if (aarch64_tls_size
== 48)
9887 return SYMBOL_TLSLE48
;
9891 case TLS_MODEL_EMULATED
:
9892 case TLS_MODEL_NONE
:
9893 return SYMBOL_FORCE_TO_MEM
;
9900 /* Return the method that should be used to access SYMBOL_REF or
9903 enum aarch64_symbol_type
9904 aarch64_classify_symbol (rtx x
, rtx offset
)
9906 if (GET_CODE (x
) == LABEL_REF
)
9908 switch (aarch64_cmodel
)
9910 case AARCH64_CMODEL_LARGE
:
9911 return SYMBOL_FORCE_TO_MEM
;
9913 case AARCH64_CMODEL_TINY_PIC
:
9914 case AARCH64_CMODEL_TINY
:
9915 return SYMBOL_TINY_ABSOLUTE
;
9917 case AARCH64_CMODEL_SMALL_SPIC
:
9918 case AARCH64_CMODEL_SMALL_PIC
:
9919 case AARCH64_CMODEL_SMALL
:
9920 return SYMBOL_SMALL_ABSOLUTE
;
9927 if (GET_CODE (x
) == SYMBOL_REF
)
9929 if (aarch64_tls_symbol_p (x
))
9930 return aarch64_classify_tls_symbol (x
);
9932 switch (aarch64_cmodel
)
9934 case AARCH64_CMODEL_TINY
:
9935 /* When we retrieve symbol + offset address, we have to make sure
9936 the offset does not cause overflow of the final address. But
9937 we have no way of knowing the address of symbol at compile time
9938 so we can't accurately say if the distance between the PC and
9939 symbol + offset is outside the addressible range of +/-1M in the
9940 TINY code model. So we rely on images not being greater than
9941 1M and cap the offset at 1M and anything beyond 1M will have to
9942 be loaded using an alternative mechanism. Furthermore if the
9943 symbol is a weak reference to something that isn't known to
9944 resolve to a symbol in this module, then force to memory. */
9945 if ((SYMBOL_REF_WEAK (x
)
9946 && !aarch64_symbol_binds_local_p (x
))
9947 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
9948 return SYMBOL_FORCE_TO_MEM
;
9949 return SYMBOL_TINY_ABSOLUTE
;
9951 case AARCH64_CMODEL_SMALL
:
9952 /* Same reasoning as the tiny code model, but the offset cap here is
9954 if ((SYMBOL_REF_WEAK (x
)
9955 && !aarch64_symbol_binds_local_p (x
))
9956 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
9957 HOST_WIDE_INT_C (4294967264)))
9958 return SYMBOL_FORCE_TO_MEM
;
9959 return SYMBOL_SMALL_ABSOLUTE
;
9961 case AARCH64_CMODEL_TINY_PIC
:
9962 if (!aarch64_symbol_binds_local_p (x
))
9963 return SYMBOL_TINY_GOT
;
9964 return SYMBOL_TINY_ABSOLUTE
;
9966 case AARCH64_CMODEL_SMALL_SPIC
:
9967 case AARCH64_CMODEL_SMALL_PIC
:
9968 if (!aarch64_symbol_binds_local_p (x
))
9969 return (aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
9970 ? SYMBOL_SMALL_GOT_28K
: SYMBOL_SMALL_GOT_4G
);
9971 return SYMBOL_SMALL_ABSOLUTE
;
9973 case AARCH64_CMODEL_LARGE
:
9974 /* This is alright even in PIC code as the constant
9975 pool reference is always PC relative and within
9976 the same translation unit. */
9977 if (CONSTANT_POOL_ADDRESS_P (x
))
9978 return SYMBOL_SMALL_ABSOLUTE
;
9980 return SYMBOL_FORCE_TO_MEM
;
9987 /* By default push everything into the constant pool. */
9988 return SYMBOL_FORCE_TO_MEM
;
9992 aarch64_constant_address_p (rtx x
)
9994 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
9998 aarch64_legitimate_pic_operand_p (rtx x
)
10000 if (GET_CODE (x
) == SYMBOL_REF
10001 || (GET_CODE (x
) == CONST
10002 && GET_CODE (XEXP (x
, 0)) == PLUS
10003 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
10009 /* Return true if X holds either a quarter-precision or
10010 floating-point +0.0 constant. */
10012 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
10014 if (!CONST_DOUBLE_P (x
))
10017 if (aarch64_float_const_zero_rtx_p (x
))
10020 /* We only handle moving 0.0 to a TFmode register. */
10021 if (!(mode
== SFmode
|| mode
== DFmode
))
10024 return aarch64_float_const_representable_p (x
);
10028 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
10030 /* Do not allow vector struct mode constants. We could support
10031 0 and -1 easily, but they need support in aarch64-simd.md. */
10032 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
10035 /* This could probably go away because
10036 we now decompose CONST_INTs according to expand_mov_immediate. */
10037 if ((GET_CODE (x
) == CONST_VECTOR
10038 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
10039 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
10040 return !targetm
.cannot_force_const_mem (mode
, x
);
10042 if (GET_CODE (x
) == HIGH
10043 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
10046 return aarch64_constant_address_p (x
);
10050 aarch64_load_tp (rtx target
)
10053 || GET_MODE (target
) != Pmode
10054 || !register_operand (target
, Pmode
))
10055 target
= gen_reg_rtx (Pmode
);
10057 /* Can return in any reg. */
10058 emit_insn (gen_aarch64_load_tp_hard (target
));
10062 /* On AAPCS systems, this is the "struct __va_list". */
10063 static GTY(()) tree va_list_type
;
10065 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10066 Return the type to use as __builtin_va_list.
10068 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10080 aarch64_build_builtin_va_list (void)
10083 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10085 /* Create the type. */
10086 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
10087 /* Give it the required name. */
10088 va_list_name
= build_decl (BUILTINS_LOCATION
,
10090 get_identifier ("__va_list"),
10092 DECL_ARTIFICIAL (va_list_name
) = 1;
10093 TYPE_NAME (va_list_type
) = va_list_name
;
10094 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
10096 /* Create the fields. */
10097 f_stack
= build_decl (BUILTINS_LOCATION
,
10098 FIELD_DECL
, get_identifier ("__stack"),
10100 f_grtop
= build_decl (BUILTINS_LOCATION
,
10101 FIELD_DECL
, get_identifier ("__gr_top"),
10103 f_vrtop
= build_decl (BUILTINS_LOCATION
,
10104 FIELD_DECL
, get_identifier ("__vr_top"),
10106 f_groff
= build_decl (BUILTINS_LOCATION
,
10107 FIELD_DECL
, get_identifier ("__gr_offs"),
10108 integer_type_node
);
10109 f_vroff
= build_decl (BUILTINS_LOCATION
,
10110 FIELD_DECL
, get_identifier ("__vr_offs"),
10111 integer_type_node
);
10113 /* Tell tree-stdarg pass about our internal offset fields.
10114 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10115 purpose to identify whether the code is updating va_list internal
10116 offset fields through irregular way. */
10117 va_list_gpr_counter_field
= f_groff
;
10118 va_list_fpr_counter_field
= f_vroff
;
10120 DECL_ARTIFICIAL (f_stack
) = 1;
10121 DECL_ARTIFICIAL (f_grtop
) = 1;
10122 DECL_ARTIFICIAL (f_vrtop
) = 1;
10123 DECL_ARTIFICIAL (f_groff
) = 1;
10124 DECL_ARTIFICIAL (f_vroff
) = 1;
10126 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
10127 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
10128 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
10129 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
10130 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
10132 TYPE_FIELDS (va_list_type
) = f_stack
;
10133 DECL_CHAIN (f_stack
) = f_grtop
;
10134 DECL_CHAIN (f_grtop
) = f_vrtop
;
10135 DECL_CHAIN (f_vrtop
) = f_groff
;
10136 DECL_CHAIN (f_groff
) = f_vroff
;
10138 /* Compute its layout. */
10139 layout_type (va_list_type
);
10141 return va_list_type
;
10144 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10146 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
10148 const CUMULATIVE_ARGS
*cum
;
10149 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10150 tree stack
, grtop
, vrtop
, groff
, vroff
;
10152 int gr_save_area_size
= cfun
->va_list_gpr_size
;
10153 int vr_save_area_size
= cfun
->va_list_fpr_size
;
10156 cum
= &crtl
->args
.info
;
10157 if (cfun
->va_list_gpr_size
)
10158 gr_save_area_size
= MIN ((NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
,
10159 cfun
->va_list_gpr_size
);
10160 if (cfun
->va_list_fpr_size
)
10161 vr_save_area_size
= MIN ((NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
)
10162 * UNITS_PER_VREG
, cfun
->va_list_fpr_size
);
10166 gcc_assert (cum
->aapcs_nvrn
== 0);
10167 vr_save_area_size
= 0;
10170 f_stack
= TYPE_FIELDS (va_list_type_node
);
10171 f_grtop
= DECL_CHAIN (f_stack
);
10172 f_vrtop
= DECL_CHAIN (f_grtop
);
10173 f_groff
= DECL_CHAIN (f_vrtop
);
10174 f_vroff
= DECL_CHAIN (f_groff
);
10176 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
10178 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
10180 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
10182 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
10184 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
10187 /* Emit code to initialize STACK, which points to the next varargs stack
10188 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10189 by named arguments. STACK is 8-byte aligned. */
10190 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
10191 if (cum
->aapcs_stack_size
> 0)
10192 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
10193 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
10194 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10196 /* Emit code to initialize GRTOP, the top of the GR save area.
10197 virtual_incoming_args_rtx should have been 16 byte aligned. */
10198 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
10199 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
10200 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10202 /* Emit code to initialize VRTOP, the top of the VR save area.
10203 This address is gr_save_area_bytes below GRTOP, rounded
10204 down to the next 16-byte boundary. */
10205 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
10206 vr_offset
= ROUND_UP (gr_save_area_size
,
10207 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10210 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
10211 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
10212 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10214 /* Emit code to initialize GROFF, the offset from GRTOP of the
10215 next GPR argument. */
10216 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
10217 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
10218 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10220 /* Likewise emit code to initialize VROFF, the offset from FTOP
10221 of the next VR argument. */
10222 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
10223 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
10224 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10227 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10230 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
10231 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
10235 bool is_ha
; /* is HFA or HVA. */
10236 bool dw_align
; /* double-word align. */
10237 machine_mode ag_mode
= VOIDmode
;
10241 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10242 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
10243 HOST_WIDE_INT size
, rsize
, adjust
, align
;
10244 tree t
, u
, cond1
, cond2
;
10246 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
10248 type
= build_pointer_type (type
);
10250 mode
= TYPE_MODE (type
);
10252 f_stack
= TYPE_FIELDS (va_list_type_node
);
10253 f_grtop
= DECL_CHAIN (f_stack
);
10254 f_vrtop
= DECL_CHAIN (f_grtop
);
10255 f_groff
= DECL_CHAIN (f_vrtop
);
10256 f_vroff
= DECL_CHAIN (f_groff
);
10258 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
10259 f_stack
, NULL_TREE
);
10260 size
= int_size_in_bytes (type
);
10261 struct aarch64_fn_arg_alignment aa
10262 = aarch64_function_arg_alignment (mode
, type
);
10263 align
= aa
.alignment
/ BITS_PER_UNIT
;
10267 if (aarch64_vfp_is_call_or_return_candidate (mode
,
10273 /* TYPE passed in fp/simd registers. */
10275 aarch64_err_no_fpadvsimd (mode
, "varargs");
10277 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
10278 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
10279 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
10280 unshare_expr (valist
), f_vroff
, NULL_TREE
);
10282 rsize
= nregs
* UNITS_PER_VREG
;
10286 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
10287 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
10289 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10290 && size
< UNITS_PER_VREG
)
10292 adjust
= UNITS_PER_VREG
- size
;
10297 /* TYPE passed in general registers. */
10298 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
10299 unshare_expr (valist
), f_grtop
, NULL_TREE
);
10300 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
10301 unshare_expr (valist
), f_groff
, NULL_TREE
);
10302 rsize
= ROUND_UP (size
, UNITS_PER_WORD
);
10303 nregs
= rsize
/ UNITS_PER_WORD
;
10308 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10309 && size
< UNITS_PER_WORD
)
10311 adjust
= UNITS_PER_WORD
- size
;
10315 /* Get a local temporary for the field value. */
10316 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
10318 /* Emit code to branch if off >= 0. */
10319 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
10320 build_int_cst (TREE_TYPE (off
), 0));
10321 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
10325 /* Emit: offs = (offs + 15) & -16. */
10326 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10327 build_int_cst (TREE_TYPE (off
), 15));
10328 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
10329 build_int_cst (TREE_TYPE (off
), -16));
10330 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
10335 /* Update ap.__[g|v]r_offs */
10336 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10337 build_int_cst (TREE_TYPE (off
), rsize
));
10338 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
10342 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10344 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10345 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
10346 build_int_cst (TREE_TYPE (f_off
), 0));
10347 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
10349 /* String up: make sure the assignment happens before the use. */
10350 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
10351 COND_EXPR_ELSE (cond1
) = t
;
10353 /* Prepare the trees handling the argument that is passed on the stack;
10354 the top level node will store in ON_STACK. */
10355 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
10358 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10359 t
= fold_convert (intDI_type_node
, arg
);
10360 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10361 build_int_cst (TREE_TYPE (t
), 15));
10362 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10363 build_int_cst (TREE_TYPE (t
), -16));
10364 t
= fold_convert (TREE_TYPE (arg
), t
);
10365 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
10369 /* Advance ap.__stack */
10370 t
= fold_convert (intDI_type_node
, arg
);
10371 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10372 build_int_cst (TREE_TYPE (t
), size
+ 7));
10373 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10374 build_int_cst (TREE_TYPE (t
), -8));
10375 t
= fold_convert (TREE_TYPE (arg
), t
);
10376 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
10377 /* String up roundup and advance. */
10379 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10380 /* String up with arg */
10381 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
10382 /* Big-endianness related address adjustment. */
10383 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10384 && size
< UNITS_PER_WORD
)
10386 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
10387 size_int (UNITS_PER_WORD
- size
));
10388 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
10391 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
10392 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
10394 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10397 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
10398 build_int_cst (TREE_TYPE (off
), adjust
));
10400 t
= fold_convert (sizetype
, t
);
10401 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
10405 /* type ha; // treat as "struct {ftype field[n];}"
10406 ... [computing offs]
10407 for (i = 0; i <nregs; ++i, offs += 16)
10408 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10411 tree tmp_ha
, field_t
, field_ptr_t
;
10413 /* Declare a local variable. */
10414 tmp_ha
= create_tmp_var_raw (type
, "ha");
10415 gimple_add_tmp_var (tmp_ha
);
10417 /* Establish the base type. */
10421 field_t
= float_type_node
;
10422 field_ptr_t
= float_ptr_type_node
;
10425 field_t
= double_type_node
;
10426 field_ptr_t
= double_ptr_type_node
;
10429 field_t
= long_double_type_node
;
10430 field_ptr_t
= long_double_ptr_type_node
;
10433 field_t
= aarch64_fp16_type_node
;
10434 field_ptr_t
= aarch64_fp16_ptr_type_node
;
10439 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
10440 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
10441 field_ptr_t
= build_pointer_type (field_t
);
10448 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10449 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
10451 t
= fold_convert (field_ptr_t
, addr
);
10452 t
= build2 (MODIFY_EXPR
, field_t
,
10453 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
10454 build1 (INDIRECT_REF
, field_t
, t
));
10456 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10457 for (i
= 1; i
< nregs
; ++i
)
10459 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
10460 u
= fold_convert (field_ptr_t
, addr
);
10461 u
= build2 (MODIFY_EXPR
, field_t
,
10462 build2 (MEM_REF
, field_t
, tmp_ha
,
10463 build_int_cst (field_ptr_t
,
10465 int_size_in_bytes (field_t
)))),
10466 build1 (INDIRECT_REF
, field_t
, u
));
10467 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
10470 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
10471 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
10474 COND_EXPR_ELSE (cond2
) = t
;
10475 addr
= fold_convert (build_pointer_type (type
), cond1
);
10476 addr
= build_va_arg_indirect_ref (addr
);
10479 addr
= build_va_arg_indirect_ref (addr
);
10484 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
10487 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
10488 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
10491 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10492 CUMULATIVE_ARGS local_cum
;
10493 int gr_saved
= cfun
->va_list_gpr_size
;
10494 int vr_saved
= cfun
->va_list_fpr_size
;
10496 /* The caller has advanced CUM up to, but not beyond, the last named
10497 argument. Advance a local copy of CUM past the last "real" named
10498 argument, to find out how many registers are left over. */
10500 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
10502 /* Found out how many registers we need to save.
10503 Honor tree-stdvar analysis results. */
10504 if (cfun
->va_list_gpr_size
)
10505 gr_saved
= MIN (NUM_ARG_REGS
- local_cum
.aapcs_ncrn
,
10506 cfun
->va_list_gpr_size
/ UNITS_PER_WORD
);
10507 if (cfun
->va_list_fpr_size
)
10508 vr_saved
= MIN (NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
,
10509 cfun
->va_list_fpr_size
/ UNITS_PER_VREG
);
10513 gcc_assert (local_cum
.aapcs_nvrn
== 0);
10523 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10524 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
10525 - gr_saved
* UNITS_PER_WORD
);
10526 mem
= gen_frame_mem (BLKmode
, ptr
);
10527 set_mem_alias_set (mem
, get_varargs_alias_set ());
10529 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
10534 /* We can't use move_block_from_reg, because it will use
10535 the wrong mode, storing D regs only. */
10536 machine_mode mode
= TImode
;
10537 int off
, i
, vr_start
;
10539 /* Set OFF to the offset from virtual_incoming_args_rtx of
10540 the first vector register. The VR save area lies below
10541 the GR one, and is aligned to 16 bytes. */
10542 off
= -ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10543 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10544 off
-= vr_saved
* UNITS_PER_VREG
;
10546 vr_start
= V0_REGNUM
+ local_cum
.aapcs_nvrn
;
10547 for (i
= 0; i
< vr_saved
; ++i
)
10551 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
10552 mem
= gen_frame_mem (mode
, ptr
);
10553 set_mem_alias_set (mem
, get_varargs_alias_set ());
10554 aarch64_emit_move (mem
, gen_rtx_REG (mode
, vr_start
+ i
));
10555 off
+= UNITS_PER_VREG
;
10560 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10561 any complication of having crtl->args.pretend_args_size changed. */
10562 cfun
->machine
->frame
.saved_varargs_size
10563 = (ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10564 STACK_BOUNDARY
/ BITS_PER_UNIT
)
10565 + vr_saved
* UNITS_PER_VREG
);
10569 aarch64_conditional_register_usage (void)
10574 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
10577 call_used_regs
[i
] = 1;
10582 /* Walk down the type tree of TYPE counting consecutive base elements.
10583 If *MODEP is VOIDmode, then set it to the first valid floating point
10584 type. If a non-floating point type is found, or if a floating point
10585 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10586 otherwise return the count in the sub-tree. */
10588 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
10591 HOST_WIDE_INT size
;
10593 switch (TREE_CODE (type
))
10596 mode
= TYPE_MODE (type
);
10597 if (mode
!= DFmode
&& mode
!= SFmode
10598 && mode
!= TFmode
&& mode
!= HFmode
)
10601 if (*modep
== VOIDmode
)
10604 if (*modep
== mode
)
10610 mode
= TYPE_MODE (TREE_TYPE (type
));
10611 if (mode
!= DFmode
&& mode
!= SFmode
10612 && mode
!= TFmode
&& mode
!= HFmode
)
10615 if (*modep
== VOIDmode
)
10618 if (*modep
== mode
)
10624 /* Use V2SImode and V4SImode as representatives of all 64-bit
10625 and 128-bit vector types. */
10626 size
= int_size_in_bytes (type
);
10639 if (*modep
== VOIDmode
)
10642 /* Vector modes are considered to be opaque: two vectors are
10643 equivalent for the purposes of being homogeneous aggregates
10644 if they are the same size. */
10645 if (*modep
== mode
)
10653 tree index
= TYPE_DOMAIN (type
);
10655 /* Can't handle incomplete types nor sizes that are not
10657 if (!COMPLETE_TYPE_P (type
)
10658 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10661 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
10664 || !TYPE_MAX_VALUE (index
)
10665 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
10666 || !TYPE_MIN_VALUE (index
)
10667 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
10671 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
10672 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
10674 /* There must be no padding. */
10675 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10687 /* Can't handle incomplete types nor sizes that are not
10689 if (!COMPLETE_TYPE_P (type
)
10690 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10693 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
10695 if (TREE_CODE (field
) != FIELD_DECL
)
10698 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
10701 count
+= sub_count
;
10704 /* There must be no padding. */
10705 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10712 case QUAL_UNION_TYPE
:
10714 /* These aren't very interesting except in a degenerate case. */
10719 /* Can't handle incomplete types nor sizes that are not
10721 if (!COMPLETE_TYPE_P (type
)
10722 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10725 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
10727 if (TREE_CODE (field
) != FIELD_DECL
)
10730 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
10733 count
= count
> sub_count
? count
: sub_count
;
10736 /* There must be no padding. */
10737 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10750 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10751 type as described in AAPCS64 \S 4.1.2.
10753 See the comment above aarch64_composite_type_p for the notes on MODE. */
10756 aarch64_short_vector_p (const_tree type
,
10759 HOST_WIDE_INT size
= -1;
10761 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
10762 size
= int_size_in_bytes (type
);
10763 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
10764 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
10765 size
= GET_MODE_SIZE (mode
);
10767 return (size
== 8 || size
== 16);
10770 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
10771 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10772 array types. The C99 floating-point complex types are also considered
10773 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10774 types, which are GCC extensions and out of the scope of AAPCS64, are
10775 treated as composite types here as well.
10777 Note that MODE itself is not sufficient in determining whether a type
10778 is such a composite type or not. This is because
10779 stor-layout.c:compute_record_mode may have already changed the MODE
10780 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10781 structure with only one field may have its MODE set to the mode of the
10782 field. Also an integer mode whose size matches the size of the
10783 RECORD_TYPE type may be used to substitute the original mode
10784 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10785 solely relied on. */
10788 aarch64_composite_type_p (const_tree type
,
10791 if (aarch64_short_vector_p (type
, mode
))
10794 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
10797 if (mode
== BLKmode
10798 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
10799 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
10805 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
10806 shall be passed or returned in simd/fp register(s) (providing these
10807 parameter passing registers are available).
10809 Upon successful return, *COUNT returns the number of needed registers,
10810 *BASE_MODE returns the mode of the individual register and when IS_HAF
10811 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10812 floating-point aggregate or a homogeneous short-vector aggregate. */
10815 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
10817 machine_mode
*base_mode
,
10821 machine_mode new_mode
= VOIDmode
;
10822 bool composite_p
= aarch64_composite_type_p (type
, mode
);
10824 if (is_ha
!= NULL
) *is_ha
= false;
10826 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10827 || aarch64_short_vector_p (type
, mode
))
10832 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10834 if (is_ha
!= NULL
) *is_ha
= true;
10836 new_mode
= GET_MODE_INNER (mode
);
10838 else if (type
&& composite_p
)
10840 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
10842 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
10844 if (is_ha
!= NULL
) *is_ha
= true;
10853 *base_mode
= new_mode
;
10857 /* Implement TARGET_STRUCT_VALUE_RTX. */
10860 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
10861 int incoming ATTRIBUTE_UNUSED
)
10863 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
10866 /* Implements target hook vector_mode_supported_p. */
10868 aarch64_vector_mode_supported_p (machine_mode mode
)
10871 && (mode
== V4SImode
|| mode
== V8HImode
10872 || mode
== V16QImode
|| mode
== V2DImode
10873 || mode
== V2SImode
|| mode
== V4HImode
10874 || mode
== V8QImode
|| mode
== V2SFmode
10875 || mode
== V4SFmode
|| mode
== V2DFmode
10876 || mode
== V4HFmode
|| mode
== V8HFmode
10877 || mode
== V1DFmode
))
10883 /* Return appropriate SIMD container
10884 for MODE within a vector of WIDTH bits. */
10885 static machine_mode
10886 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
10888 gcc_assert (width
== 64 || width
== 128);
10931 /* Return 128-bit container as the preferred SIMD mode for MODE. */
10932 static machine_mode
10933 aarch64_preferred_simd_mode (machine_mode mode
)
10935 return aarch64_simd_container_mode (mode
, 128);
10938 /* Return the bitmask of possible vector sizes for the vectorizer
10939 to iterate over. */
10940 static unsigned int
10941 aarch64_autovectorize_vector_sizes (void)
10946 /* Implement TARGET_MANGLE_TYPE. */
10948 static const char *
10949 aarch64_mangle_type (const_tree type
)
10951 /* The AArch64 ABI documents say that "__va_list" has to be
10952 managled as if it is in the "std" namespace. */
10953 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
10954 return "St9__va_list";
10956 /* Half-precision float. */
10957 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
10960 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
10962 if (TYPE_NAME (type
) != NULL
)
10963 return aarch64_mangle_builtin_type (type
);
10965 /* Use the default mangling. */
10969 /* Find the first rtx_insn before insn that will generate an assembly
10973 aarch64_prev_real_insn (rtx_insn
*insn
)
10980 insn
= prev_real_insn (insn
);
10982 while (insn
&& recog_memoized (insn
) < 0);
10988 is_madd_op (enum attr_type t1
)
10991 /* A number of these may be AArch32 only. */
10992 enum attr_type mlatypes
[] = {
10993 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
10994 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
10995 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
10998 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
11000 if (t1
== mlatypes
[i
])
11007 /* Check if there is a register dependency between a load and the insn
11008 for which we hold recog_data. */
11011 dep_between_memop_and_curr (rtx memop
)
11016 gcc_assert (GET_CODE (memop
) == SET
);
11018 if (!REG_P (SET_DEST (memop
)))
11021 load_reg
= SET_DEST (memop
);
11022 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
11024 rtx operand
= recog_data
.operand
[opno
];
11025 if (REG_P (operand
)
11026 && reg_overlap_mentioned_p (load_reg
, operand
))
11034 /* When working around the Cortex-A53 erratum 835769,
11035 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11036 instruction and has a preceding memory instruction such that a NOP
11037 should be inserted between them. */
11040 aarch64_madd_needs_nop (rtx_insn
* insn
)
11042 enum attr_type attr_type
;
11046 if (!TARGET_FIX_ERR_A53_835769
)
11049 if (!INSN_P (insn
) || recog_memoized (insn
) < 0)
11052 attr_type
= get_attr_type (insn
);
11053 if (!is_madd_op (attr_type
))
11056 prev
= aarch64_prev_real_insn (insn
);
11057 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11058 Restore recog state to INSN to avoid state corruption. */
11059 extract_constrain_insn_cached (insn
);
11061 if (!prev
|| !contains_mem_rtx_p (PATTERN (prev
)))
11064 body
= single_set (prev
);
11066 /* If the previous insn is a memory op and there is no dependency between
11067 it and the DImode madd, emit a NOP between them. If body is NULL then we
11068 have a complex memory operation, probably a load/store pair.
11069 Be conservative for now and emit a NOP. */
11070 if (GET_MODE (recog_data
.operand
[0]) == DImode
11071 && (!body
|| !dep_between_memop_and_curr (body
)))
11079 /* Implement FINAL_PRESCAN_INSN. */
11082 aarch64_final_prescan_insn (rtx_insn
*insn
)
11084 if (aarch64_madd_needs_nop (insn
))
11085 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
11089 /* Return the equivalent letter for size. */
11091 sizetochar (int size
)
11095 case 64: return 'd';
11096 case 32: return 's';
11097 case 16: return 'h';
11098 case 8 : return 'b';
11099 default: gcc_unreachable ();
11103 /* Return true iff x is a uniform vector of floating-point
11104 constants, and the constant can be represented in
11105 quarter-precision form. Note, as aarch64_float_const_representable
11106 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11108 aarch64_vect_float_const_representable_p (rtx x
)
11111 return (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_FLOAT
11112 && const_vec_duplicate_p (x
, &elt
)
11113 && aarch64_float_const_representable_p (elt
));
11116 /* Return true for valid and false for invalid. */
11118 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
11119 struct simd_immediate_info
*info
)
11121 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11123 for (i = 0; i < idx; i += (STRIDE)) \
11128 immtype = (CLASS); \
11129 elsize = (ELSIZE); \
11130 eshift = (SHIFT); \
11135 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
11136 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11137 unsigned char bytes
[16];
11138 int immtype
= -1, matches
;
11139 unsigned int invmask
= inverse
? 0xff : 0;
11142 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11144 if (! (aarch64_simd_imm_zero_p (op
, mode
)
11145 || aarch64_vect_float_const_representable_p (op
)))
11150 info
->value
= CONST_VECTOR_ELT (op
, 0);
11151 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
11159 /* Splat vector constant out into a byte vector. */
11160 for (i
= 0; i
< n_elts
; i
++)
11162 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11163 it must be laid out in the vector register in reverse order. */
11164 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
11165 unsigned HOST_WIDE_INT elpart
;
11167 gcc_assert (CONST_INT_P (el
));
11168 elpart
= INTVAL (el
);
11170 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11172 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11173 elpart
>>= BITS_PER_UNIT
;
11178 /* Sanity check. */
11179 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11183 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11184 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
11186 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11187 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11189 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11190 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11192 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11193 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
11195 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
11197 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
11199 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11200 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
11202 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11203 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11205 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11206 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11208 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11209 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
11211 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
11213 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
11215 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11216 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11218 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11219 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11221 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11222 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11224 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11225 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11227 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
11229 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11230 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
11239 info
->element_width
= elsize
;
11240 info
->mvn
= emvn
!= 0;
11241 info
->shift
= eshift
;
11243 unsigned HOST_WIDE_INT imm
= 0;
11245 if (immtype
>= 12 && immtype
<= 15)
11248 /* Un-invert bytes of recognized vector, if necessary. */
11250 for (i
= 0; i
< idx
; i
++)
11251 bytes
[i
] ^= invmask
;
11255 /* FIXME: Broken on 32-bit H_W_I hosts. */
11256 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11258 for (i
= 0; i
< 8; i
++)
11259 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11260 << (i
* BITS_PER_UNIT
);
11263 info
->value
= GEN_INT (imm
);
11267 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11268 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11270 /* Construct 'abcdefgh' because the assembler cannot handle
11271 generic constants. */
11274 imm
= (imm
>> info
->shift
) & 0xff;
11275 info
->value
= GEN_INT (imm
);
11283 /* Check of immediate shift constants are within range. */
11285 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
11287 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
11289 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
11291 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
11294 /* Return true if X is a uniform vector where all elements
11295 are either the floating-point constant 0.0 or the
11296 integer constant 0. */
11298 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
11300 return x
== CONST0_RTX (mode
);
11304 /* Return the bitmask CONST_INT to select the bits required by a zero extract
11305 operation of width WIDTH at bit position POS. */
11308 aarch64_mask_from_zextract_ops (rtx width
, rtx pos
)
11310 gcc_assert (CONST_INT_P (width
));
11311 gcc_assert (CONST_INT_P (pos
));
11313 unsigned HOST_WIDE_INT mask
11314 = ((unsigned HOST_WIDE_INT
) 1 << UINTVAL (width
)) - 1;
11315 return GEN_INT (mask
<< UINTVAL (pos
));
11319 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
11321 HOST_WIDE_INT imm
= INTVAL (x
);
11324 for (i
= 0; i
< 8; i
++)
11326 unsigned int byte
= imm
& 0xff;
11327 if (byte
!= 0xff && byte
!= 0)
11336 aarch64_mov_operand_p (rtx x
, machine_mode mode
)
11338 if (GET_CODE (x
) == HIGH
11339 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
11342 if (CONST_INT_P (x
))
11345 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
11348 return aarch64_classify_symbolic_expression (x
)
11349 == SYMBOL_TINY_ABSOLUTE
;
11352 /* Return a const_int vector of VAL. */
11354 aarch64_simd_gen_const_vector_dup (machine_mode mode
, HOST_WIDE_INT val
)
11356 int nunits
= GET_MODE_NUNITS (mode
);
11357 rtvec v
= rtvec_alloc (nunits
);
11360 rtx cache
= GEN_INT (val
);
11362 for (i
=0; i
< nunits
; i
++)
11363 RTVEC_ELT (v
, i
) = cache
;
11365 return gen_rtx_CONST_VECTOR (mode
, v
);
11368 /* Check OP is a legal scalar immediate for the MOVI instruction. */
11371 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
11373 machine_mode vmode
;
11375 gcc_assert (!VECTOR_MODE_P (mode
));
11376 vmode
= aarch64_preferred_simd_mode (mode
);
11377 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
11378 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
11381 /* Construct and return a PARALLEL RTX vector with elements numbering the
11382 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11383 the vector - from the perspective of the architecture. This does not
11384 line up with GCC's perspective on lane numbers, so we end up with
11385 different masks depending on our target endian-ness. The diagram
11386 below may help. We must draw the distinction when building masks
11387 which select one half of the vector. An instruction selecting
11388 architectural low-lanes for a big-endian target, must be described using
11389 a mask selecting GCC high-lanes.
11391 Big-Endian Little-Endian
11393 GCC 0 1 2 3 3 2 1 0
11394 | x | x | x | x | | x | x | x | x |
11395 Architecture 3 2 1 0 3 2 1 0
11397 Low Mask: { 2, 3 } { 0, 1 }
11398 High Mask: { 0, 1 } { 2, 3 }
11402 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
11404 int nunits
= GET_MODE_NUNITS (mode
);
11405 rtvec v
= rtvec_alloc (nunits
/ 2);
11406 int high_base
= nunits
/ 2;
11412 if (BYTES_BIG_ENDIAN
)
11413 base
= high
? low_base
: high_base
;
11415 base
= high
? high_base
: low_base
;
11417 for (i
= 0; i
< nunits
/ 2; i
++)
11418 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
11420 t1
= gen_rtx_PARALLEL (mode
, v
);
11424 /* Check OP for validity as a PARALLEL RTX vector with elements
11425 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11426 from the perspective of the architecture. See the diagram above
11427 aarch64_simd_vect_par_cnst_half for more details. */
11430 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
11433 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
11434 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
11435 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
11438 if (!VECTOR_MODE_P (mode
))
11441 if (count_op
!= count_ideal
)
11444 for (i
= 0; i
< count_ideal
; i
++)
11446 rtx elt_op
= XVECEXP (op
, 0, i
);
11447 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
11449 if (!CONST_INT_P (elt_op
)
11450 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
11456 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11457 HIGH (exclusive). */
11459 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11462 HOST_WIDE_INT lane
;
11463 gcc_assert (CONST_INT_P (operand
));
11464 lane
= INTVAL (operand
);
11466 if (lane
< low
|| lane
>= high
)
11469 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
11471 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
11475 /* Return TRUE if OP is a valid vector addressing mode. */
11477 aarch64_simd_mem_operand_p (rtx op
)
11479 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
11480 || REG_P (XEXP (op
, 0)));
11483 /* Emit a register copy from operand to operand, taking care not to
11484 early-clobber source registers in the process.
11486 COUNT is the number of components into which the copy needs to be
11489 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
11490 unsigned int count
)
11493 int rdest
= REGNO (operands
[0]);
11494 int rsrc
= REGNO (operands
[1]);
11496 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
11498 for (i
= 0; i
< count
; i
++)
11499 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
11500 gen_rtx_REG (mode
, rsrc
+ i
));
11502 for (i
= 0; i
< count
; i
++)
11503 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
11504 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
11507 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
11508 one of VSTRUCT modes: OI, CI, or XI. */
11510 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
11512 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
11515 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11516 alignment of a vector to 128 bits. */
11517 static HOST_WIDE_INT
11518 aarch64_simd_vector_alignment (const_tree type
)
11520 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
11521 return MIN (align
, 128);
11524 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11526 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
11531 /* We guarantee alignment for vectors up to 128-bits. */
11532 if (tree_int_cst_compare (TYPE_SIZE (type
),
11533 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
11536 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11540 /* Return true if the vector misalignment factor is supported by the
11543 aarch64_builtin_support_vector_misalignment (machine_mode mode
,
11544 const_tree type
, int misalignment
,
11547 if (TARGET_SIMD
&& STRICT_ALIGNMENT
)
11549 /* Return if movmisalign pattern is not supported for this mode. */
11550 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
11553 if (misalignment
== -1)
11555 /* Misalignment factor is unknown at compile time but we know
11556 it's word aligned. */
11557 if (aarch64_simd_vector_alignment_reachable (type
, is_packed
))
11559 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
11561 if (element_size
!= 64)
11567 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
11571 /* If VALS is a vector constant that can be loaded into a register
11572 using DUP, generate instructions to do so and return an RTX to
11573 assign to the register. Otherwise return NULL_RTX. */
11575 aarch64_simd_dup_constant (rtx vals
)
11577 machine_mode mode
= GET_MODE (vals
);
11578 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11581 if (!const_vec_duplicate_p (vals
, &x
))
11584 /* We can load this constant by using DUP and a constant in a
11585 single ARM register. This will be cheaper than a vector
11587 x
= copy_to_mode_reg (inner_mode
, x
);
11588 return gen_rtx_VEC_DUPLICATE (mode
, x
);
11592 /* Generate code to load VALS, which is a PARALLEL containing only
11593 constants (for vec_init) or CONST_VECTOR, efficiently into a
11594 register. Returns an RTX to copy into the register, or NULL_RTX
11595 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11597 aarch64_simd_make_constant (rtx vals
)
11599 machine_mode mode
= GET_MODE (vals
);
11601 rtx const_vec
= NULL_RTX
;
11602 int n_elts
= GET_MODE_NUNITS (mode
);
11606 if (GET_CODE (vals
) == CONST_VECTOR
)
11608 else if (GET_CODE (vals
) == PARALLEL
)
11610 /* A CONST_VECTOR must contain only CONST_INTs and
11611 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11612 Only store valid constants in a CONST_VECTOR. */
11613 for (i
= 0; i
< n_elts
; ++i
)
11615 rtx x
= XVECEXP (vals
, 0, i
);
11616 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11619 if (n_const
== n_elts
)
11620 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11623 gcc_unreachable ();
11625 if (const_vec
!= NULL_RTX
11626 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
11627 /* Load using MOVI/MVNI. */
11629 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
11630 /* Loaded using DUP. */
11632 else if (const_vec
!= NULL_RTX
)
11633 /* Load from constant pool. We can not take advantage of single-cycle
11634 LD1 because we need a PC-relative addressing mode. */
11637 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11638 We can not construct an initializer. */
11642 /* Expand a vector initialisation sequence, such that TARGET is
11643 initialised to contain VALS. */
11646 aarch64_expand_vector_init (rtx target
, rtx vals
)
11648 machine_mode mode
= GET_MODE (target
);
11649 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11650 /* The number of vector elements. */
11651 int n_elts
= GET_MODE_NUNITS (mode
);
11652 /* The number of vector elements which are not constant. */
11654 rtx any_const
= NULL_RTX
;
11655 /* The first element of vals. */
11656 rtx v0
= XVECEXP (vals
, 0, 0);
11657 bool all_same
= true;
11659 /* Count the number of variable elements to initialise. */
11660 for (int i
= 0; i
< n_elts
; ++i
)
11662 rtx x
= XVECEXP (vals
, 0, i
);
11663 if (!(CONST_INT_P (x
) || CONST_DOUBLE_P (x
)))
11668 all_same
&= rtx_equal_p (x
, v0
);
11671 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11672 how best to handle this. */
11675 rtx constant
= aarch64_simd_make_constant (vals
);
11676 if (constant
!= NULL_RTX
)
11678 emit_move_insn (target
, constant
);
11683 /* Splat a single non-constant element if we can. */
11686 rtx x
= copy_to_mode_reg (inner_mode
, v0
);
11687 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
11691 /* Initialise a vector which is part-variable. We want to first try
11692 to build those lanes which are constant in the most efficient way we
11694 if (n_var
!= n_elts
)
11696 rtx copy
= copy_rtx (vals
);
11698 /* Load constant part of vector. We really don't care what goes into the
11699 parts we will overwrite, but we're more likely to be able to load the
11700 constant efficiently if it has fewer, larger, repeating parts
11701 (see aarch64_simd_valid_immediate). */
11702 for (int i
= 0; i
< n_elts
; i
++)
11704 rtx x
= XVECEXP (vals
, 0, i
);
11705 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11707 rtx subst
= any_const
;
11708 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
11710 /* Look in the copied vector, as more elements are const. */
11711 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
11712 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
11718 XVECEXP (copy
, 0, i
) = subst
;
11720 aarch64_expand_vector_init (target
, copy
);
11723 /* Insert the variable lanes directly. */
11725 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
11726 gcc_assert (icode
!= CODE_FOR_nothing
);
11728 for (int i
= 0; i
< n_elts
; i
++)
11730 rtx x
= XVECEXP (vals
, 0, i
);
11731 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11733 x
= copy_to_mode_reg (inner_mode
, x
);
11734 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
11738 static unsigned HOST_WIDE_INT
11739 aarch64_shift_truncation_mask (machine_mode mode
)
11742 (!SHIFT_COUNT_TRUNCATED
11743 || aarch64_vector_mode_supported_p (mode
)
11744 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
11747 /* Select a format to encode pointers in exception handling data. */
11749 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
11752 switch (aarch64_cmodel
)
11754 case AARCH64_CMODEL_TINY
:
11755 case AARCH64_CMODEL_TINY_PIC
:
11756 case AARCH64_CMODEL_SMALL
:
11757 case AARCH64_CMODEL_SMALL_PIC
:
11758 case AARCH64_CMODEL_SMALL_SPIC
:
11759 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11761 type
= DW_EH_PE_sdata4
;
11764 /* No assumptions here. 8-byte relocs required. */
11765 type
= DW_EH_PE_sdata8
;
11768 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
11771 /* The last .arch and .tune assembly strings that we printed. */
11772 static std::string aarch64_last_printed_arch_string
;
11773 static std::string aarch64_last_printed_tune_string
;
11775 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11776 by the function fndecl. */
11779 aarch64_declare_function_name (FILE *stream
, const char* name
,
11782 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
11784 struct cl_target_option
*targ_options
;
11786 targ_options
= TREE_TARGET_OPTION (target_parts
);
11788 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
11789 gcc_assert (targ_options
);
11791 const struct processor
*this_arch
11792 = aarch64_get_arch (targ_options
->x_explicit_arch
);
11794 unsigned long isa_flags
= targ_options
->x_aarch64_isa_flags
;
11795 std::string extension
11796 = aarch64_get_extension_string_for_isa_flags (isa_flags
,
11798 /* Only update the assembler .arch string if it is distinct from the last
11799 such string we printed. */
11800 std::string to_print
= this_arch
->name
+ extension
;
11801 if (to_print
!= aarch64_last_printed_arch_string
)
11803 asm_fprintf (asm_out_file
, "\t.arch %s\n", to_print
.c_str ());
11804 aarch64_last_printed_arch_string
= to_print
;
11807 /* Print the cpu name we're tuning for in the comments, might be
11808 useful to readers of the generated asm. Do it only when it changes
11809 from function to function and verbose assembly is requested. */
11810 const struct processor
*this_tune
11811 = aarch64_get_tune_cpu (targ_options
->x_explicit_tune_core
);
11813 if (flag_debug_asm
&& aarch64_last_printed_tune_string
!= this_tune
->name
)
11815 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune %s\n",
11817 aarch64_last_printed_tune_string
= this_tune
->name
;
11820 /* Don't forget the type directive for ELF. */
11821 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
11822 ASM_OUTPUT_LABEL (stream
, name
);
11825 /* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11828 aarch64_start_file (void)
11830 struct cl_target_option
*default_options
11831 = TREE_TARGET_OPTION (target_option_default_node
);
11833 const struct processor
*default_arch
11834 = aarch64_get_arch (default_options
->x_explicit_arch
);
11835 unsigned long default_isa_flags
= default_options
->x_aarch64_isa_flags
;
11836 std::string extension
11837 = aarch64_get_extension_string_for_isa_flags (default_isa_flags
,
11838 default_arch
->flags
);
11840 aarch64_last_printed_arch_string
= default_arch
->name
+ extension
;
11841 aarch64_last_printed_tune_string
= "";
11842 asm_fprintf (asm_out_file
, "\t.arch %s\n",
11843 aarch64_last_printed_arch_string
.c_str ());
11845 default_file_start ();
11848 /* Emit load exclusive. */
11851 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
11852 rtx mem
, rtx model_rtx
)
11854 rtx (*gen
) (rtx
, rtx
, rtx
);
11858 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
11859 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
11860 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
11861 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
11863 gcc_unreachable ();
11866 emit_insn (gen (rval
, mem
, model_rtx
));
11869 /* Emit store exclusive. */
11872 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
11873 rtx rval
, rtx mem
, rtx model_rtx
)
11875 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
11879 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
11880 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
11881 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
11882 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
11884 gcc_unreachable ();
11887 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
11890 /* Mark the previous jump instruction as unlikely. */
11893 aarch64_emit_unlikely_jump (rtx insn
)
11895 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
11897 rtx_insn
*jump
= emit_jump_insn (insn
);
11898 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
11901 /* Expand a compare and swap pattern. */
11904 aarch64_expand_compare_and_swap (rtx operands
[])
11906 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
11907 machine_mode mode
, cmp_mode
;
11908 typedef rtx (*gen_cas_fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
11911 const gen_cas_fn split_cas
[] =
11913 gen_aarch64_compare_and_swapqi
,
11914 gen_aarch64_compare_and_swaphi
,
11915 gen_aarch64_compare_and_swapsi
,
11916 gen_aarch64_compare_and_swapdi
11918 const gen_cas_fn atomic_cas
[] =
11920 gen_aarch64_compare_and_swapqi_lse
,
11921 gen_aarch64_compare_and_swaphi_lse
,
11922 gen_aarch64_compare_and_swapsi_lse
,
11923 gen_aarch64_compare_and_swapdi_lse
11926 bval
= operands
[0];
11927 rval
= operands
[1];
11929 oldval
= operands
[3];
11930 newval
= operands
[4];
11931 is_weak
= operands
[5];
11932 mod_s
= operands
[6];
11933 mod_f
= operands
[7];
11934 mode
= GET_MODE (mem
);
11937 /* Normally the succ memory model must be stronger than fail, but in the
11938 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
11939 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
11941 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
11942 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
11943 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
11949 /* For short modes, we're going to perform the comparison in SImode,
11950 so do the zero-extension now. */
11952 rval
= gen_reg_rtx (SImode
);
11953 oldval
= convert_modes (SImode
, mode
, oldval
, true);
11954 /* Fall through. */
11958 /* Force the value into a register if needed. */
11959 if (!aarch64_plus_operand (oldval
, mode
))
11960 oldval
= force_reg (cmp_mode
, oldval
);
11964 gcc_unreachable ();
11969 case QImode
: idx
= 0; break;
11970 case HImode
: idx
= 1; break;
11971 case SImode
: idx
= 2; break;
11972 case DImode
: idx
= 3; break;
11974 gcc_unreachable ();
11977 gen
= atomic_cas
[idx
];
11979 gen
= split_cas
[idx
];
11981 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
11983 if (mode
== QImode
|| mode
== HImode
)
11984 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
11986 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
11987 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
11988 emit_insn (gen_rtx_SET (bval
, x
));
11991 /* Test whether the target supports using a atomic load-operate instruction.
11992 CODE is the operation and AFTER is TRUE if the data in memory after the
11993 operation should be returned and FALSE if the data before the operation
11994 should be returned. Returns FALSE if the operation isn't supported by the
11998 aarch64_atomic_ldop_supported_p (enum rtx_code code
)
12017 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12018 sequence implementing an atomic operation. */
12021 aarch64_emit_post_barrier (enum memmodel model
)
12023 const enum memmodel base_model
= memmodel_base (model
);
12025 if (is_mm_sync (model
)
12026 && (base_model
== MEMMODEL_ACQUIRE
12027 || base_model
== MEMMODEL_ACQ_REL
12028 || base_model
== MEMMODEL_SEQ_CST
))
12030 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
12034 /* Emit an atomic compare-and-swap operation. RVAL is the destination register
12035 for the data in memory. EXPECTED is the value expected to be in memory.
12036 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12037 is the memory ordering to use. */
12040 aarch64_gen_atomic_cas (rtx rval
, rtx mem
,
12041 rtx expected
, rtx desired
,
12044 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12047 mode
= GET_MODE (mem
);
12051 case QImode
: gen
= gen_aarch64_atomic_casqi
; break;
12052 case HImode
: gen
= gen_aarch64_atomic_cashi
; break;
12053 case SImode
: gen
= gen_aarch64_atomic_cassi
; break;
12054 case DImode
: gen
= gen_aarch64_atomic_casdi
; break;
12056 gcc_unreachable ();
12059 /* Move the expected value into the CAS destination register. */
12060 emit_insn (gen_rtx_SET (rval
, expected
));
12062 /* Emit the CAS. */
12063 emit_insn (gen (rval
, mem
, desired
, model
));
12065 /* Compare the expected value with the value loaded by the CAS, to establish
12066 whether the swap was made. */
12067 aarch64_gen_compare_reg (EQ
, rval
, expected
);
12070 /* Split a compare and swap pattern. */
12073 aarch64_split_compare_and_swap (rtx operands
[])
12075 rtx rval
, mem
, oldval
, newval
, scratch
;
12078 rtx_code_label
*label1
, *label2
;
12080 enum memmodel model
;
12083 rval
= operands
[0];
12085 oldval
= operands
[2];
12086 newval
= operands
[3];
12087 is_weak
= (operands
[4] != const0_rtx
);
12088 model_rtx
= operands
[5];
12089 scratch
= operands
[7];
12090 mode
= GET_MODE (mem
);
12091 model
= memmodel_from_int (INTVAL (model_rtx
));
12096 label1
= gen_label_rtx ();
12097 emit_label (label1
);
12099 label2
= gen_label_rtx ();
12101 /* The initial load can be relaxed for a __sync operation since a final
12102 barrier will be emitted to stop code hoisting. */
12103 if (is_mm_sync (model
))
12104 aarch64_emit_load_exclusive (mode
, rval
, mem
,
12105 GEN_INT (MEMMODEL_RELAXED
));
12107 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
12109 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
12110 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12111 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12112 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
12113 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12115 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
12119 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
12120 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12121 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
12122 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12126 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12127 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
12128 emit_insn (gen_rtx_SET (cond
, x
));
12131 emit_label (label2
);
12133 /* Emit any final barrier needed for a __sync operation. */
12134 if (is_mm_sync (model
))
12135 aarch64_emit_post_barrier (model
);
12138 /* Emit a BIC instruction. */
12141 aarch64_emit_bic (machine_mode mode
, rtx dst
, rtx s1
, rtx s2
, int shift
)
12143 rtx shift_rtx
= GEN_INT (shift
);
12144 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12148 case SImode
: gen
= gen_and_one_cmpl_lshrsi3
; break;
12149 case DImode
: gen
= gen_and_one_cmpl_lshrdi3
; break;
12151 gcc_unreachable ();
12154 emit_insn (gen (dst
, s2
, shift_rtx
, s1
));
12157 /* Emit an atomic swap. */
12160 aarch64_emit_atomic_swap (machine_mode mode
, rtx dst
, rtx value
,
12161 rtx mem
, rtx model
)
12163 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12167 case QImode
: gen
= gen_aarch64_atomic_swpqi
; break;
12168 case HImode
: gen
= gen_aarch64_atomic_swphi
; break;
12169 case SImode
: gen
= gen_aarch64_atomic_swpsi
; break;
12170 case DImode
: gen
= gen_aarch64_atomic_swpdi
; break;
12172 gcc_unreachable ();
12175 emit_insn (gen (dst
, mem
, value
, model
));
12178 /* Operations supported by aarch64_emit_atomic_load_op. */
12180 enum aarch64_atomic_load_op_code
12182 AARCH64_LDOP_PLUS
, /* A + B */
12183 AARCH64_LDOP_XOR
, /* A ^ B */
12184 AARCH64_LDOP_OR
, /* A | B */
12185 AARCH64_LDOP_BIC
/* A & ~B */
12188 /* Emit an atomic load-operate. */
12191 aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code
,
12192 machine_mode mode
, rtx dst
, rtx src
,
12193 rtx mem
, rtx model
)
12195 typedef rtx (*aarch64_atomic_load_op_fn
) (rtx
, rtx
, rtx
, rtx
);
12196 const aarch64_atomic_load_op_fn plus
[] =
12198 gen_aarch64_atomic_loadaddqi
,
12199 gen_aarch64_atomic_loadaddhi
,
12200 gen_aarch64_atomic_loadaddsi
,
12201 gen_aarch64_atomic_loadadddi
12203 const aarch64_atomic_load_op_fn eor
[] =
12205 gen_aarch64_atomic_loadeorqi
,
12206 gen_aarch64_atomic_loadeorhi
,
12207 gen_aarch64_atomic_loadeorsi
,
12208 gen_aarch64_atomic_loadeordi
12210 const aarch64_atomic_load_op_fn ior
[] =
12212 gen_aarch64_atomic_loadsetqi
,
12213 gen_aarch64_atomic_loadsethi
,
12214 gen_aarch64_atomic_loadsetsi
,
12215 gen_aarch64_atomic_loadsetdi
12217 const aarch64_atomic_load_op_fn bic
[] =
12219 gen_aarch64_atomic_loadclrqi
,
12220 gen_aarch64_atomic_loadclrhi
,
12221 gen_aarch64_atomic_loadclrsi
,
12222 gen_aarch64_atomic_loadclrdi
12224 aarch64_atomic_load_op_fn gen
;
12229 case QImode
: idx
= 0; break;
12230 case HImode
: idx
= 1; break;
12231 case SImode
: idx
= 2; break;
12232 case DImode
: idx
= 3; break;
12234 gcc_unreachable ();
12239 case AARCH64_LDOP_PLUS
: gen
= plus
[idx
]; break;
12240 case AARCH64_LDOP_XOR
: gen
= eor
[idx
]; break;
12241 case AARCH64_LDOP_OR
: gen
= ior
[idx
]; break;
12242 case AARCH64_LDOP_BIC
: gen
= bic
[idx
]; break;
12244 gcc_unreachable ();
12247 emit_insn (gen (dst
, mem
, src
, model
));
12250 /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
12251 location to store the data read from memory. OUT_RESULT is the location to
12252 store the result of the operation. MEM is the memory location to read and
12253 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12254 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12258 aarch64_gen_atomic_ldop (enum rtx_code code
, rtx out_data
, rtx out_result
,
12259 rtx mem
, rtx value
, rtx model_rtx
)
12261 machine_mode mode
= GET_MODE (mem
);
12262 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12263 const bool short_mode
= (mode
< SImode
);
12264 aarch64_atomic_load_op_code ldop_code
;
12269 out_data
= gen_lowpart (mode
, out_data
);
12272 out_result
= gen_lowpart (mode
, out_result
);
12274 /* Make sure the value is in a register, putting it into a destination
12275 register if it needs to be manipulated. */
12276 if (!register_operand (value
, mode
)
12277 || code
== AND
|| code
== MINUS
)
12279 src
= out_result
? out_result
: out_data
;
12280 emit_move_insn (src
, gen_lowpart (mode
, value
));
12284 gcc_assert (register_operand (src
, mode
));
12286 /* Preprocess the data for the operation as necessary. If the operation is
12287 a SET then emit a swap instruction and finish. */
12291 aarch64_emit_atomic_swap (mode
, out_data
, src
, mem
, model_rtx
);
12295 /* Negate the value and treat it as a PLUS. */
12299 /* Resize the value if necessary. */
12301 src
= gen_lowpart (wmode
, src
);
12303 neg_src
= gen_rtx_NEG (wmode
, src
);
12304 emit_insn (gen_rtx_SET (src
, neg_src
));
12307 src
= gen_lowpart (mode
, src
);
12309 /* Fall-through. */
12311 ldop_code
= AARCH64_LDOP_PLUS
;
12315 ldop_code
= AARCH64_LDOP_OR
;
12319 ldop_code
= AARCH64_LDOP_XOR
;
12326 /* Resize the value if necessary. */
12328 src
= gen_lowpart (wmode
, src
);
12330 not_src
= gen_rtx_NOT (wmode
, src
);
12331 emit_insn (gen_rtx_SET (src
, not_src
));
12334 src
= gen_lowpart (mode
, src
);
12336 ldop_code
= AARCH64_LDOP_BIC
;
12340 /* The operation can't be done with atomic instructions. */
12341 gcc_unreachable ();
12344 aarch64_emit_atomic_load_op (ldop_code
, mode
, out_data
, src
, mem
, model_rtx
);
12346 /* If necessary, calculate the data in memory after the update by redoing the
12347 operation from values in registers. */
12353 src
= gen_lowpart (wmode
, src
);
12354 out_data
= gen_lowpart (wmode
, out_data
);
12355 out_result
= gen_lowpart (wmode
, out_result
);
12364 x
= gen_rtx_PLUS (wmode
, out_data
, src
);
12367 x
= gen_rtx_IOR (wmode
, out_data
, src
);
12370 x
= gen_rtx_XOR (wmode
, out_data
, src
);
12373 aarch64_emit_bic (wmode
, out_result
, out_data
, src
, 0);
12376 gcc_unreachable ();
12379 emit_set_insn (out_result
, x
);
12384 /* Split an atomic operation. */
12387 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
12388 rtx value
, rtx model_rtx
, rtx cond
)
12390 machine_mode mode
= GET_MODE (mem
);
12391 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12392 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
12393 const bool is_sync
= is_mm_sync (model
);
12394 rtx_code_label
*label
;
12397 /* Split the atomic operation into a sequence. */
12398 label
= gen_label_rtx ();
12399 emit_label (label
);
12402 new_out
= gen_lowpart (wmode
, new_out
);
12404 old_out
= gen_lowpart (wmode
, old_out
);
12407 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
12409 /* The initial load can be relaxed for a __sync operation since a final
12410 barrier will be emitted to stop code hoisting. */
12412 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
12413 GEN_INT (MEMMODEL_RELAXED
));
12415 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
12424 x
= gen_rtx_AND (wmode
, old_out
, value
);
12425 emit_insn (gen_rtx_SET (new_out
, x
));
12426 x
= gen_rtx_NOT (wmode
, new_out
);
12427 emit_insn (gen_rtx_SET (new_out
, x
));
12431 if (CONST_INT_P (value
))
12433 value
= GEN_INT (-INTVAL (value
));
12436 /* Fall through. */
12439 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
12440 emit_insn (gen_rtx_SET (new_out
, x
));
12444 aarch64_emit_store_exclusive (mode
, cond
, mem
,
12445 gen_lowpart (mode
, new_out
), model_rtx
);
12447 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12448 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12449 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
12450 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12452 /* Emit any final barrier needed for a __sync operation. */
12454 aarch64_emit_post_barrier (model
);
12458 aarch64_init_libfuncs (void)
12460 /* Half-precision float operations. The compiler handles all operations
12461 with NULL libfuncs by converting to SFmode. */
12464 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
, "__gnu_f2h_ieee");
12465 set_conv_libfunc (sext_optab
, SFmode
, HFmode
, "__gnu_h2f_ieee");
12468 set_optab_libfunc (add_optab
, HFmode
, NULL
);
12469 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
12470 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
12471 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
12472 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
12475 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
12476 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
12477 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
12478 set_optab_libfunc (le_optab
, HFmode
, NULL
);
12479 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
12480 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
12481 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
12484 /* Target hook for c_mode_for_suffix. */
12485 static machine_mode
12486 aarch64_c_mode_for_suffix (char suffix
)
12494 /* We can only represent floating point constants which will fit in
12495 "quarter-precision" values. These values are characterised by
12496 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12499 (-1)^s * (n/16) * 2^r
12502 's' is the sign bit.
12503 'n' is an integer in the range 16 <= n <= 31.
12504 'r' is an integer in the range -3 <= r <= 4. */
12506 /* Return true iff X can be represented by a quarter-precision
12507 floating point immediate operand X. Note, we cannot represent 0.0. */
12509 aarch64_float_const_representable_p (rtx x
)
12511 /* This represents our current view of how many bits
12512 make up the mantissa. */
12513 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12515 unsigned HOST_WIDE_INT mantissa
, mask
;
12516 REAL_VALUE_TYPE r
, m
;
12519 if (!CONST_DOUBLE_P (x
))
12522 /* We don't support HFmode constants yet. */
12523 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == HFmode
)
12526 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12528 /* We cannot represent infinities, NaNs or +/-zero. We won't
12529 know if we have +zero until we analyse the mantissa, but we
12530 can reject the other invalid values. */
12531 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
12532 || REAL_VALUE_MINUS_ZERO (r
))
12535 /* Extract exponent. */
12536 r
= real_value_abs (&r
);
12537 exponent
= REAL_EXP (&r
);
12539 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12540 highest (sign) bit, with a fixed binary point at bit point_pos.
12541 m1 holds the low part of the mantissa, m2 the high part.
12542 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12543 bits for the mantissa, this can fail (low bits will be lost). */
12544 real_ldexp (&m
, &r
, point_pos
- exponent
);
12545 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12547 /* If the low part of the mantissa has bits set we cannot represent
12549 if (w
.ulow () != 0)
12551 /* We have rejected the lower HOST_WIDE_INT, so update our
12552 understanding of how many bits lie in the mantissa and
12553 look only at the high HOST_WIDE_INT. */
12554 mantissa
= w
.elt (1);
12555 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12557 /* We can only represent values with a mantissa of the form 1.xxxx. */
12558 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12559 if ((mantissa
& mask
) != 0)
12562 /* Having filtered unrepresentable values, we may now remove all
12563 but the highest 5 bits. */
12564 mantissa
>>= point_pos
- 5;
12566 /* We cannot represent the value 0.0, so reject it. This is handled
12571 /* Then, as bit 4 is always set, we can mask it off, leaving
12572 the mantissa in the range [0, 15]. */
12573 mantissa
&= ~(1 << 4);
12574 gcc_assert (mantissa
<= 15);
12576 /* GCC internally does not use IEEE754-like encoding (where normalized
12577 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12578 Our mantissa values are shifted 4 places to the left relative to
12579 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12580 by 5 places to correct for GCC's representation. */
12581 exponent
= 5 - exponent
;
12583 return (exponent
>= 0 && exponent
<= 7);
12587 aarch64_output_simd_mov_immediate (rtx const_vector
,
12592 static char templ
[40];
12593 const char *mnemonic
;
12594 const char *shift_op
;
12595 unsigned int lane_count
= 0;
12598 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
12600 /* This will return true to show const_vector is legal for use as either
12601 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12602 also update INFO to show how the immediate should be generated. */
12603 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
12604 gcc_assert (is_valid
);
12606 element_char
= sizetochar (info
.element_width
);
12607 lane_count
= width
/ info
.element_width
;
12609 mode
= GET_MODE_INNER (mode
);
12610 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
12612 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
12613 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12614 move immediate path. */
12615 if (aarch64_float_const_zero_rtx_p (info
.value
))
12616 info
.value
= GEN_INT (0);
12619 const unsigned int buf_size
= 20;
12620 char float_buf
[buf_size
] = {'\0'};
12621 real_to_decimal_for_mode (float_buf
,
12622 CONST_DOUBLE_REAL_VALUE (info
.value
),
12623 buf_size
, buf_size
, 1, mode
);
12625 if (lane_count
== 1)
12626 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
12628 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
12629 lane_count
, element_char
, float_buf
);
12634 mnemonic
= info
.mvn
? "mvni" : "movi";
12635 shift_op
= info
.msl
? "msl" : "lsl";
12637 gcc_assert (CONST_INT_P (info
.value
));
12638 if (lane_count
== 1)
12639 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
12640 mnemonic
, UINTVAL (info
.value
));
12641 else if (info
.shift
)
12642 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
12643 ", %s %d", mnemonic
, lane_count
, element_char
,
12644 UINTVAL (info
.value
), shift_op
, info
.shift
);
12646 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
12647 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
12652 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
12655 machine_mode vmode
;
12657 gcc_assert (!VECTOR_MODE_P (mode
));
12658 vmode
= aarch64_simd_container_mode (mode
, 64);
12659 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
12660 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
12663 /* Split operands into moves from op[1] + op[2] into op[0]. */
12666 aarch64_split_combinev16qi (rtx operands
[3])
12668 unsigned int dest
= REGNO (operands
[0]);
12669 unsigned int src1
= REGNO (operands
[1]);
12670 unsigned int src2
= REGNO (operands
[2]);
12671 machine_mode halfmode
= GET_MODE (operands
[1]);
12672 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
12673 rtx destlo
, desthi
;
12675 gcc_assert (halfmode
== V16QImode
);
12677 if (src1
== dest
&& src2
== dest
+ halfregs
)
12679 /* No-op move. Can't split to nothing; emit something. */
12680 emit_note (NOTE_INSN_DELETED
);
12684 /* Preserve register attributes for variable tracking. */
12685 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
12686 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
12687 GET_MODE_SIZE (halfmode
));
12689 /* Special case of reversed high/low parts. */
12690 if (reg_overlap_mentioned_p (operands
[2], destlo
)
12691 && reg_overlap_mentioned_p (operands
[1], desthi
))
12693 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
12694 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
12695 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
12697 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
12699 /* Try to avoid unnecessary moves if part of the result
12700 is in the right place already. */
12702 emit_move_insn (destlo
, operands
[1]);
12703 if (src2
!= dest
+ halfregs
)
12704 emit_move_insn (desthi
, operands
[2]);
12708 if (src2
!= dest
+ halfregs
)
12709 emit_move_insn (desthi
, operands
[2]);
12711 emit_move_insn (destlo
, operands
[1]);
12715 /* vec_perm support. */
12717 #define MAX_VECT_LEN 16
12719 struct expand_vec_perm_d
12721 rtx target
, op0
, op1
;
12722 unsigned char perm
[MAX_VECT_LEN
];
12723 machine_mode vmode
;
12724 unsigned char nelt
;
12729 /* Generate a variable permutation. */
12732 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
12734 machine_mode vmode
= GET_MODE (target
);
12735 bool one_vector_p
= rtx_equal_p (op0
, op1
);
12737 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
12738 gcc_checking_assert (GET_MODE (op0
) == vmode
);
12739 gcc_checking_assert (GET_MODE (op1
) == vmode
);
12740 gcc_checking_assert (GET_MODE (sel
) == vmode
);
12741 gcc_checking_assert (TARGET_SIMD
);
12745 if (vmode
== V8QImode
)
12747 /* Expand the argument to a V16QI mode by duplicating it. */
12748 rtx pair
= gen_reg_rtx (V16QImode
);
12749 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
12750 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
12754 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
12761 if (vmode
== V8QImode
)
12763 pair
= gen_reg_rtx (V16QImode
);
12764 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
12765 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
12769 pair
= gen_reg_rtx (OImode
);
12770 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
12771 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
12777 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
12779 machine_mode vmode
= GET_MODE (target
);
12780 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
12781 bool one_vector_p
= rtx_equal_p (op0
, op1
);
12784 /* The TBL instruction does not use a modulo index, so we must take care
12785 of that ourselves. */
12786 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
12787 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
12788 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
12790 /* For big-endian, we also need to reverse the index within the vector
12791 (but not which vector). */
12792 if (BYTES_BIG_ENDIAN
)
12794 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12796 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
12797 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
12798 NULL
, 0, OPTAB_LIB_WIDEN
);
12800 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
12803 /* Recognize patterns suitable for the TRN instructions. */
12805 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
12807 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
12808 rtx out
, in0
, in1
, x
;
12809 rtx (*gen
) (rtx
, rtx
, rtx
);
12810 machine_mode vmode
= d
->vmode
;
12812 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
12815 /* Note that these are little-endian tests.
12816 We correct for big-endian later. */
12817 if (d
->perm
[0] == 0)
12819 else if (d
->perm
[0] == 1)
12823 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
12825 for (i
= 0; i
< nelt
; i
+= 2)
12827 if (d
->perm
[i
] != i
+ odd
)
12829 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
12839 if (BYTES_BIG_ENDIAN
)
12841 x
= in0
, in0
= in1
, in1
= x
;
12850 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
12851 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
12852 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
12853 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
12854 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
12855 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
12856 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
12857 case V4HFmode
: gen
= gen_aarch64_trn2v4hf
; break;
12858 case V8HFmode
: gen
= gen_aarch64_trn2v8hf
; break;
12859 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
12860 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
12861 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
12870 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
12871 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
12872 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
12873 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
12874 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
12875 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
12876 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
12877 case V4HFmode
: gen
= gen_aarch64_trn1v4hf
; break;
12878 case V8HFmode
: gen
= gen_aarch64_trn1v8hf
; break;
12879 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
12880 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
12881 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
12887 emit_insn (gen (out
, in0
, in1
));
12891 /* Recognize patterns suitable for the UZP instructions. */
12893 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
12895 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
12896 rtx out
, in0
, in1
, x
;
12897 rtx (*gen
) (rtx
, rtx
, rtx
);
12898 machine_mode vmode
= d
->vmode
;
12900 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
12903 /* Note that these are little-endian tests.
12904 We correct for big-endian later. */
12905 if (d
->perm
[0] == 0)
12907 else if (d
->perm
[0] == 1)
12911 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
12913 for (i
= 0; i
< nelt
; i
++)
12915 unsigned elt
= (i
* 2 + odd
) & mask
;
12916 if (d
->perm
[i
] != elt
)
12926 if (BYTES_BIG_ENDIAN
)
12928 x
= in0
, in0
= in1
, in1
= x
;
12937 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
12938 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
12939 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
12940 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
12941 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
12942 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
12943 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
12944 case V4HFmode
: gen
= gen_aarch64_uzp2v4hf
; break;
12945 case V8HFmode
: gen
= gen_aarch64_uzp2v8hf
; break;
12946 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
12947 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
12948 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
12957 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
12958 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
12959 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
12960 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
12961 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
12962 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
12963 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
12964 case V4HFmode
: gen
= gen_aarch64_uzp1v4hf
; break;
12965 case V8HFmode
: gen
= gen_aarch64_uzp1v8hf
; break;
12966 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
12967 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
12968 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
12974 emit_insn (gen (out
, in0
, in1
));
12978 /* Recognize patterns suitable for the ZIP instructions. */
12980 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
12982 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
12983 rtx out
, in0
, in1
, x
;
12984 rtx (*gen
) (rtx
, rtx
, rtx
);
12985 machine_mode vmode
= d
->vmode
;
12987 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
12990 /* Note that these are little-endian tests.
12991 We correct for big-endian later. */
12993 if (d
->perm
[0] == high
)
12996 else if (d
->perm
[0] == 0)
13000 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13002 for (i
= 0; i
< nelt
/ 2; i
++)
13004 unsigned elt
= (i
+ high
) & mask
;
13005 if (d
->perm
[i
* 2] != elt
)
13007 elt
= (elt
+ nelt
) & mask
;
13008 if (d
->perm
[i
* 2 + 1] != elt
)
13018 if (BYTES_BIG_ENDIAN
)
13020 x
= in0
, in0
= in1
, in1
= x
;
13029 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
13030 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
13031 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
13032 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
13033 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
13034 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
13035 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
13036 case V4HFmode
: gen
= gen_aarch64_zip2v4hf
; break;
13037 case V8HFmode
: gen
= gen_aarch64_zip2v8hf
; break;
13038 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
13039 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
13040 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
13049 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
13050 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
13051 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
13052 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
13053 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
13054 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
13055 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
13056 case V4HFmode
: gen
= gen_aarch64_zip1v4hf
; break;
13057 case V8HFmode
: gen
= gen_aarch64_zip1v8hf
; break;
13058 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
13059 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
13060 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
13066 emit_insn (gen (out
, in0
, in1
));
13070 /* Recognize patterns for the EXT insn. */
13073 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
13075 unsigned int i
, nelt
= d
->nelt
;
13076 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
13079 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
13081 /* Check if the extracted indices are increasing by one. */
13082 for (i
= 1; i
< nelt
; i
++)
13084 unsigned int required
= location
+ i
;
13085 if (d
->one_vector_p
)
13087 /* We'll pass the same vector in twice, so allow indices to wrap. */
13088 required
&= (nelt
- 1);
13090 if (d
->perm
[i
] != required
)
13096 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
13097 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
13098 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
13099 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
13100 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
13101 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
13102 case V4HFmode
: gen
= gen_aarch64_extv4hf
; break;
13103 case V8HFmode
: gen
= gen_aarch64_extv8hf
; break;
13104 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
13105 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
13106 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
13107 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
13116 /* The case where (location == 0) is a no-op for both big- and little-endian,
13117 and is removed by the mid-end at optimization levels -O1 and higher. */
13119 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
13121 /* After setup, we want the high elements of the first vector (stored
13122 at the LSB end of the register), and the low elements of the second
13123 vector (stored at the MSB end of the register). So swap. */
13124 std::swap (d
->op0
, d
->op1
);
13125 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13126 location
= nelt
- location
;
13129 offset
= GEN_INT (location
);
13130 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
13134 /* Recognize patterns for the REV insns. */
13137 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
13139 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
13140 rtx (*gen
) (rtx
, rtx
);
13142 if (!d
->one_vector_p
)
13151 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
13152 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
13160 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
13161 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
13162 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
13163 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
13171 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
13172 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
13173 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
13174 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
13175 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
13176 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
13177 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
13178 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
13179 case V8HFmode
: gen
= gen_aarch64_rev64v8hf
; break;
13180 case V4HFmode
: gen
= gen_aarch64_rev64v4hf
; break;
13189 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
13190 for (j
= 0; j
<= diff
; j
+= 1)
13192 /* This is guaranteed to be true as the value of diff
13193 is 7, 3, 1 and we should have enough elements in the
13194 queue to generate this. Getting a vector mask with a
13195 value of diff other than these values implies that
13196 something is wrong by the time we get here. */
13197 gcc_assert (i
+ j
< nelt
);
13198 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
13206 emit_insn (gen (d
->target
, d
->op0
));
13211 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
13213 rtx (*gen
) (rtx
, rtx
, rtx
);
13214 rtx out
= d
->target
;
13216 machine_mode vmode
= d
->vmode
;
13217 unsigned int i
, elt
, nelt
= d
->nelt
;
13221 for (i
= 1; i
< nelt
; i
++)
13223 if (elt
!= d
->perm
[i
])
13227 /* The generic preparation in aarch64_expand_vec_perm_const_1
13228 swaps the operand order and the permute indices if it finds
13229 d->perm[0] to be in the second operand. Thus, we can always
13230 use d->op0 and need not do any extra arithmetic to get the
13231 correct lane number. */
13233 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
13237 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
13238 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
13239 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
13240 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
13241 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
13242 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
13243 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
13244 case V8HFmode
: gen
= gen_aarch64_dup_lanev8hf
; break;
13245 case V4HFmode
: gen
= gen_aarch64_dup_lanev4hf
; break;
13246 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
13247 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
13248 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
13253 emit_insn (gen (out
, in0
, lane
));
13258 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
13260 rtx rperm
[MAX_VECT_LEN
], sel
;
13261 machine_mode vmode
= d
->vmode
;
13262 unsigned int i
, nelt
= d
->nelt
;
13267 /* Generic code will try constant permutation twice. Once with the
13268 original mode and again with the elements lowered to QImode.
13269 So wait and don't do the selector expansion ourselves. */
13270 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
13273 for (i
= 0; i
< nelt
; ++i
)
13275 int nunits
= GET_MODE_NUNITS (vmode
);
13277 /* If big-endian and two vectors we end up with a weird mixed-endian
13278 mode on NEON. Reverse the index within each word but not the word
13280 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
13283 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
13284 sel
= force_reg (vmode
, sel
);
13286 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
13291 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
13293 /* The pattern matching functions above are written to look for a small
13294 number to begin the sequence (0, 1, N/2). If we begin with an index
13295 from the second operand, we can swap the operands. */
13296 if (d
->perm
[0] >= d
->nelt
)
13298 unsigned i
, nelt
= d
->nelt
;
13300 gcc_assert (nelt
== (nelt
& -nelt
));
13301 for (i
= 0; i
< nelt
; ++i
)
13302 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
13304 std::swap (d
->op0
, d
->op1
);
13309 if (aarch64_evpc_rev (d
))
13311 else if (aarch64_evpc_ext (d
))
13313 else if (aarch64_evpc_dup (d
))
13315 else if (aarch64_evpc_zip (d
))
13317 else if (aarch64_evpc_uzp (d
))
13319 else if (aarch64_evpc_trn (d
))
13321 return aarch64_evpc_tbl (d
);
13326 /* Expand a vec_perm_const pattern. */
13329 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
13331 struct expand_vec_perm_d d
;
13332 int i
, nelt
, which
;
13338 d
.vmode
= GET_MODE (target
);
13339 gcc_assert (VECTOR_MODE_P (d
.vmode
));
13340 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
13341 d
.testing_p
= false;
13343 for (i
= which
= 0; i
< nelt
; ++i
)
13345 rtx e
= XVECEXP (sel
, 0, i
);
13346 int ei
= INTVAL (e
) & (2 * nelt
- 1);
13347 which
|= (ei
< nelt
? 1 : 2);
13354 gcc_unreachable ();
13357 d
.one_vector_p
= false;
13358 if (!rtx_equal_p (op0
, op1
))
13361 /* The elements of PERM do not suggest that only the first operand
13362 is used, but both operands are identical. Allow easier matching
13363 of the permutation by folding the permutation into the single
13365 /* Fall Through. */
13367 for (i
= 0; i
< nelt
; ++i
)
13368 d
.perm
[i
] &= nelt
- 1;
13370 d
.one_vector_p
= true;
13375 d
.one_vector_p
= true;
13379 return aarch64_expand_vec_perm_const_1 (&d
);
13383 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
13384 const unsigned char *sel
)
13386 struct expand_vec_perm_d d
;
13387 unsigned int i
, nelt
, which
;
13391 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
13392 d
.testing_p
= true;
13393 memcpy (d
.perm
, sel
, nelt
);
13395 /* Calculate whether all elements are in one vector. */
13396 for (i
= which
= 0; i
< nelt
; ++i
)
13398 unsigned char e
= d
.perm
[i
];
13399 gcc_assert (e
< 2 * nelt
);
13400 which
|= (e
< nelt
? 1 : 2);
13403 /* If all elements are from the second vector, reindex as if from the
13406 for (i
= 0; i
< nelt
; ++i
)
13409 /* Check whether the mask can be applied to a single vector. */
13410 d
.one_vector_p
= (which
!= 3);
13412 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
13413 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
13414 if (!d
.one_vector_p
)
13415 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
13418 ret
= aarch64_expand_vec_perm_const_1 (&d
);
13425 aarch64_reverse_mask (enum machine_mode mode
)
13427 /* We have to reverse each vector because we dont have
13428 a permuted load that can reverse-load according to ABI rules. */
13430 rtvec v
= rtvec_alloc (16);
13432 int nunits
= GET_MODE_NUNITS (mode
);
13433 int usize
= GET_MODE_UNIT_SIZE (mode
);
13435 gcc_assert (BYTES_BIG_ENDIAN
);
13436 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
13438 for (i
= 0; i
< nunits
; i
++)
13439 for (j
= 0; j
< usize
; j
++)
13440 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
13441 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
13442 return force_reg (V16QImode
, mask
);
13445 /* Implement MODES_TIEABLE_P. In principle we should always return true.
13446 However due to issues with register allocation it is preferable to avoid
13447 tieing integer scalar and FP scalar modes. Executing integer operations
13448 in general registers is better than treating them as scalar vector
13449 operations. This reduces latency and avoids redundant int<->FP moves.
13450 So tie modes if they are either the same class, or vector modes with
13451 other vector modes, vector structs or any scalar mode.
13455 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
13457 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
13460 /* We specifically want to allow elements of "structure" modes to
13461 be tieable to the structure. This more general condition allows
13462 other rarer situations too. */
13463 if (aarch64_vector_mode_p (mode1
) && aarch64_vector_mode_p (mode2
))
13466 /* Also allow any scalar modes with vectors. */
13467 if (aarch64_vector_mode_supported_p (mode1
)
13468 || aarch64_vector_mode_supported_p (mode2
))
13474 /* Return a new RTX holding the result of moving POINTER forward by
13478 aarch64_move_pointer (rtx pointer
, int amount
)
13480 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
13482 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
13486 /* Return a new RTX holding the result of moving POINTER forward by the
13487 size of the mode it points to. */
13490 aarch64_progress_pointer (rtx pointer
)
13492 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
13494 return aarch64_move_pointer (pointer
, amount
);
13497 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13501 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
13504 rtx reg
= gen_reg_rtx (mode
);
13506 /* "Cast" the pointers to the correct mode. */
13507 *src
= adjust_address (*src
, mode
, 0);
13508 *dst
= adjust_address (*dst
, mode
, 0);
13509 /* Emit the memcpy. */
13510 emit_move_insn (reg
, *src
);
13511 emit_move_insn (*dst
, reg
);
13512 /* Move the pointers forward. */
13513 *src
= aarch64_progress_pointer (*src
);
13514 *dst
= aarch64_progress_pointer (*dst
);
13517 /* Expand movmem, as if from a __builtin_memcpy. Return true if
13518 we succeed, otherwise return false. */
13521 aarch64_expand_movmem (rtx
*operands
)
13524 rtx dst
= operands
[0];
13525 rtx src
= operands
[1];
13527 bool speed_p
= !optimize_function_for_size_p (cfun
);
13529 /* When optimizing for size, give a better estimate of the length of a
13530 memcpy call, but use the default otherwise. */
13531 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
13533 /* We can't do anything smart if the amount to copy is not constant. */
13534 if (!CONST_INT_P (operands
[2]))
13537 n
= UINTVAL (operands
[2]);
13539 /* Try to keep the number of instructions low. For cases below 16 bytes we
13540 need to make at most two moves. For cases above 16 bytes it will be one
13541 move for each 16 byte chunk, then at most two additional moves. */
13542 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
13545 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13546 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13548 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13549 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13551 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13557 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13562 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13567 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13568 4-byte chunk, partially overlapping with the previously copied chunk. */
13571 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13577 src
= aarch64_move_pointer (src
, move
);
13578 dst
= aarch64_move_pointer (dst
, move
);
13579 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13584 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13585 them, then (if applicable) an 8-byte chunk. */
13590 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
13595 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13600 /* Finish the final bytes of the copy. We can always do this in one
13601 instruction. We either copy the exact amount we need, or partially
13602 overlap with the previous chunk we copied and copy 8-bytes. */
13606 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13608 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13610 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13615 src
= aarch64_move_pointer (src
, -1);
13616 dst
= aarch64_move_pointer (dst
, -1);
13617 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13623 src
= aarch64_move_pointer (src
, move
);
13624 dst
= aarch64_move_pointer (dst
, move
);
13625 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13632 /* Split a DImode store of a CONST_INT SRC to MEM DST as two
13633 SImode stores. Handle the case when the constant has identical
13634 bottom and top halves. This is beneficial when the two stores can be
13635 merged into an STP and we avoid synthesising potentially expensive
13636 immediates twice. Return true if such a split is possible. */
13639 aarch64_split_dimode_const_store (rtx dst
, rtx src
)
13641 rtx lo
= gen_lowpart (SImode
, src
);
13642 rtx hi
= gen_highpart_mode (SImode
, DImode
, src
);
13644 bool size_p
= optimize_function_for_size_p (cfun
);
13646 if (!rtx_equal_p (lo
, hi
))
13649 unsigned int orig_cost
13650 = aarch64_internal_mov_immediate (NULL_RTX
, src
, false, DImode
);
13651 unsigned int lo_cost
13652 = aarch64_internal_mov_immediate (NULL_RTX
, lo
, false, SImode
);
13654 /* We want to transform:
13656 MOVK x1, 0x140, lsl 16
13657 MOVK x1, 0xc0da, lsl 32
13658 MOVK x1, 0x140, lsl 48
13662 MOVK w1, 0x140, lsl 16
13664 So we want to perform this only when we save two instructions
13665 or more. When optimizing for size, however, accept any code size
13667 if (size_p
&& orig_cost
<= lo_cost
)
13671 && (orig_cost
<= lo_cost
+ 1))
13674 rtx mem_lo
= adjust_address (dst
, SImode
, 0);
13675 if (!aarch64_mem_pair_operand (mem_lo
, SImode
))
13678 rtx tmp_reg
= gen_reg_rtx (SImode
);
13679 aarch64_expand_mov_immediate (tmp_reg
, lo
);
13680 rtx mem_hi
= aarch64_move_pointer (mem_lo
, GET_MODE_SIZE (SImode
));
13681 /* Don't emit an explicit store pair as this may not be always profitable.
13682 Let the sched-fusion logic decide whether to merge them. */
13683 emit_move_insn (mem_lo
, tmp_reg
);
13684 emit_move_insn (mem_hi
, tmp_reg
);
13689 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13691 static unsigned HOST_WIDE_INT
13692 aarch64_asan_shadow_offset (void)
13694 return (HOST_WIDE_INT_1
<< 36);
13698 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
13699 unsigned int align
,
13700 enum by_pieces_operation op
,
13703 /* STORE_BY_PIECES can be used when copying a constant string, but
13704 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13705 For now we always fail this and let the move_by_pieces code copy
13706 the string from read-only memory. */
13707 if (op
== STORE_BY_PIECES
)
13710 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
13714 aarch64_gen_ccmp_first (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
,
13715 int code
, tree treeop0
, tree treeop1
)
13717 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
13719 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
13721 struct expand_operand ops
[4];
13724 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
13726 op_mode
= GET_MODE (op0
);
13727 if (op_mode
== VOIDmode
)
13728 op_mode
= GET_MODE (op1
);
13736 icode
= CODE_FOR_cmpsi
;
13741 icode
= CODE_FOR_cmpdi
;
13746 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
13747 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpesf
: CODE_FOR_fcmpsf
;
13752 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
13753 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpedf
: CODE_FOR_fcmpdf
;
13761 op0
= prepare_operand (icode
, op0
, 0, op_mode
, cmp_mode
, unsignedp
);
13762 op1
= prepare_operand (icode
, op1
, 1, op_mode
, cmp_mode
, unsignedp
);
13768 *prep_seq
= get_insns ();
13771 create_fixed_operand (&ops
[0], op0
);
13772 create_fixed_operand (&ops
[1], op1
);
13775 if (!maybe_expand_insn (icode
, 2, ops
))
13780 *gen_seq
= get_insns ();
13783 return gen_rtx_fmt_ee ((rtx_code
) code
, cc_mode
,
13784 gen_rtx_REG (cc_mode
, CC_REGNUM
), const0_rtx
);
13788 aarch64_gen_ccmp_next (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
, rtx prev
,
13789 int cmp_code
, tree treeop0
, tree treeop1
, int bit_code
)
13791 rtx op0
, op1
, target
;
13792 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
13793 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
13795 struct expand_operand ops
[6];
13798 push_to_sequence (*prep_seq
);
13799 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
13801 op_mode
= GET_MODE (op0
);
13802 if (op_mode
== VOIDmode
)
13803 op_mode
= GET_MODE (op1
);
13811 icode
= CODE_FOR_ccmpsi
;
13816 icode
= CODE_FOR_ccmpdi
;
13821 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
13822 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpesf
: CODE_FOR_fccmpsf
;
13827 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
13828 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpedf
: CODE_FOR_fccmpdf
;
13836 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
13837 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
13843 *prep_seq
= get_insns ();
13846 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
13847 aarch64_cond
= aarch64_get_condition_code_1 (cc_mode
, (rtx_code
) cmp_code
);
13849 if (bit_code
!= AND
)
13851 prev
= gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev
),
13852 GET_MODE (XEXP (prev
, 0))),
13853 VOIDmode
, XEXP (prev
, 0), const0_rtx
);
13854 aarch64_cond
= AARCH64_INVERSE_CONDITION_CODE (aarch64_cond
);
13857 create_fixed_operand (&ops
[0], XEXP (prev
, 0));
13858 create_fixed_operand (&ops
[1], target
);
13859 create_fixed_operand (&ops
[2], op0
);
13860 create_fixed_operand (&ops
[3], op1
);
13861 create_fixed_operand (&ops
[4], prev
);
13862 create_fixed_operand (&ops
[5], GEN_INT (aarch64_cond
));
13864 push_to_sequence (*gen_seq
);
13865 if (!maybe_expand_insn (icode
, 6, ops
))
13871 *gen_seq
= get_insns ();
13874 return gen_rtx_fmt_ee ((rtx_code
) cmp_code
, VOIDmode
, target
, const0_rtx
);
13877 #undef TARGET_GEN_CCMP_FIRST
13878 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
13880 #undef TARGET_GEN_CCMP_NEXT
13881 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
13883 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
13884 instruction fusion of some sort. */
13887 aarch64_macro_fusion_p (void)
13889 return aarch64_tune_params
.fusible_ops
!= AARCH64_FUSE_NOTHING
;
13893 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
13894 should be kept together during scheduling. */
13897 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
13900 rtx prev_set
= single_set (prev
);
13901 rtx curr_set
= single_set (curr
);
13902 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
13903 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
13905 if (!aarch64_macro_fusion_p ())
13908 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK
))
13910 /* We are trying to match:
13911 prev (mov) == (set (reg r0) (const_int imm16))
13912 curr (movk) == (set (zero_extract (reg r0)
13915 (const_int imm16_1)) */
13917 set_dest
= SET_DEST (curr_set
);
13919 if (GET_CODE (set_dest
) == ZERO_EXTRACT
13920 && CONST_INT_P (SET_SRC (curr_set
))
13921 && CONST_INT_P (SET_SRC (prev_set
))
13922 && CONST_INT_P (XEXP (set_dest
, 2))
13923 && INTVAL (XEXP (set_dest
, 2)) == 16
13924 && REG_P (XEXP (set_dest
, 0))
13925 && REG_P (SET_DEST (prev_set
))
13926 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
13932 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD
))
13935 /* We're trying to match:
13936 prev (adrp) == (set (reg r1)
13937 (high (symbol_ref ("SYM"))))
13938 curr (add) == (set (reg r0)
13940 (symbol_ref ("SYM"))))
13941 Note that r0 need not necessarily be the same as r1, especially
13942 during pre-regalloc scheduling. */
13944 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
13945 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
13947 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
13948 && REG_P (XEXP (SET_SRC (curr_set
), 0))
13949 && REGNO (XEXP (SET_SRC (curr_set
), 0))
13950 == REGNO (SET_DEST (prev_set
))
13951 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
13952 XEXP (SET_SRC (curr_set
), 1)))
13957 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK
))
13960 /* We're trying to match:
13961 prev (movk) == (set (zero_extract (reg r0)
13964 (const_int imm16_1))
13965 curr (movk) == (set (zero_extract (reg r0)
13968 (const_int imm16_2)) */
13970 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
13971 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
13972 && REG_P (XEXP (SET_DEST (prev_set
), 0))
13973 && REG_P (XEXP (SET_DEST (curr_set
), 0))
13974 && REGNO (XEXP (SET_DEST (prev_set
), 0))
13975 == REGNO (XEXP (SET_DEST (curr_set
), 0))
13976 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
13977 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
13978 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
13979 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
13980 && CONST_INT_P (SET_SRC (prev_set
))
13981 && CONST_INT_P (SET_SRC (curr_set
)))
13985 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR
))
13987 /* We're trying to match:
13988 prev (adrp) == (set (reg r0)
13989 (high (symbol_ref ("SYM"))))
13990 curr (ldr) == (set (reg r1)
13991 (mem (lo_sum (reg r0)
13992 (symbol_ref ("SYM")))))
13994 curr (ldr) == (set (reg r1)
13997 (symbol_ref ("SYM")))))) */
13998 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
13999 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
14001 rtx curr_src
= SET_SRC (curr_set
);
14003 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
14004 curr_src
= XEXP (curr_src
, 0);
14006 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
14007 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
14008 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
14009 == REGNO (SET_DEST (prev_set
))
14010 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
14011 XEXP (SET_SRC (prev_set
), 0)))
14016 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC
)
14017 && aarch_crypto_can_dual_issue (prev
, curr
))
14020 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH
)
14021 && any_condjump_p (curr
))
14023 enum attr_type prev_type
= get_attr_type (prev
);
14025 /* FIXME: this misses some which is considered simple arthematic
14026 instructions for ThunderX. Simple shifts are missed here. */
14027 if (prev_type
== TYPE_ALUS_SREG
14028 || prev_type
== TYPE_ALUS_IMM
14029 || prev_type
== TYPE_LOGICS_REG
14030 || prev_type
== TYPE_LOGICS_IMM
)
14037 /* Return true iff the instruction fusion described by OP is enabled. */
14040 aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op
)
14042 return (aarch64_tune_params
.fusible_ops
& op
) != 0;
14045 /* If MEM is in the form of [base+offset], extract the two parts
14046 of address and set to BASE and OFFSET, otherwise return false
14047 after clearing BASE and OFFSET. */
14050 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
14054 gcc_assert (MEM_P (mem
));
14056 addr
= XEXP (mem
, 0);
14061 *offset
= const0_rtx
;
14065 if (GET_CODE (addr
) == PLUS
14066 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
14068 *base
= XEXP (addr
, 0);
14069 *offset
= XEXP (addr
, 1);
14074 *offset
= NULL_RTX
;
14079 /* Types for scheduling fusion. */
14080 enum sched_fusion_type
14082 SCHED_FUSION_NONE
= 0,
14083 SCHED_FUSION_LD_SIGN_EXTEND
,
14084 SCHED_FUSION_LD_ZERO_EXTEND
,
14090 /* If INSN is a load or store of address in the form of [base+offset],
14091 extract the two parts and set to BASE and OFFSET. Return scheduling
14092 fusion type this INSN is. */
14094 static enum sched_fusion_type
14095 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
14098 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
14100 gcc_assert (INSN_P (insn
));
14101 x
= PATTERN (insn
);
14102 if (GET_CODE (x
) != SET
)
14103 return SCHED_FUSION_NONE
;
14106 dest
= SET_DEST (x
);
14108 machine_mode dest_mode
= GET_MODE (dest
);
14110 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode
))
14111 return SCHED_FUSION_NONE
;
14113 if (GET_CODE (src
) == SIGN_EXTEND
)
14115 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
14116 src
= XEXP (src
, 0);
14117 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14118 return SCHED_FUSION_NONE
;
14120 else if (GET_CODE (src
) == ZERO_EXTEND
)
14122 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
14123 src
= XEXP (src
, 0);
14124 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14125 return SCHED_FUSION_NONE
;
14128 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
14129 extract_base_offset_in_addr (src
, base
, offset
);
14130 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
14132 fusion
= SCHED_FUSION_ST
;
14133 extract_base_offset_in_addr (dest
, base
, offset
);
14136 return SCHED_FUSION_NONE
;
14138 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
14139 fusion
= SCHED_FUSION_NONE
;
14144 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14146 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14147 and PRI are only calculated for these instructions. For other instruction,
14148 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14149 type instruction fusion can be added by returning different priorities.
14151 It's important that irrelevant instructions get the largest FUSION_PRI. */
14154 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
14155 int *fusion_pri
, int *pri
)
14159 enum sched_fusion_type fusion
;
14161 gcc_assert (INSN_P (insn
));
14164 fusion
= fusion_load_store (insn
, &base
, &offset
);
14165 if (fusion
== SCHED_FUSION_NONE
)
14172 /* Set FUSION_PRI according to fusion type and base register. */
14173 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
14175 /* Calculate PRI. */
14178 /* INSN with smaller offset goes first. */
14179 off_val
= (int)(INTVAL (offset
));
14181 tmp
-= (off_val
& 0xfffff);
14183 tmp
+= ((- off_val
) & 0xfffff);
14189 /* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14190 Adjust priority of sha1h instructions so they are scheduled before
14191 other SHA1 instructions. */
14194 aarch64_sched_adjust_priority (rtx_insn
*insn
, int priority
)
14196 rtx x
= PATTERN (insn
);
14198 if (GET_CODE (x
) == SET
)
14202 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SHA1H
)
14203 return priority
+ 10;
14209 /* Given OPERANDS of consecutive load/store, check if we can merge
14210 them into ldp/stp. LOAD is true if they are load instructions.
14211 MODE is the mode of memory operands. */
14214 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
14215 enum machine_mode mode
)
14217 HOST_WIDE_INT offval_1
, offval_2
, msize
;
14218 enum reg_class rclass_1
, rclass_2
;
14219 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
14223 mem_1
= operands
[1];
14224 mem_2
= operands
[3];
14225 reg_1
= operands
[0];
14226 reg_2
= operands
[2];
14227 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
14228 if (REGNO (reg_1
) == REGNO (reg_2
))
14233 mem_1
= operands
[0];
14234 mem_2
= operands
[2];
14235 reg_1
= operands
[1];
14236 reg_2
= operands
[3];
14239 /* The mems cannot be volatile. */
14240 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
14243 /* If we have SImode and slow unaligned ldp,
14244 check the alignment to be at least 8 byte. */
14246 && (aarch64_tune_params
.extra_tuning_flags
14247 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14249 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14252 /* Check if the addresses are in the form of [base+offset]. */
14253 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14254 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14256 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14257 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14260 /* Check if the bases are same. */
14261 if (!rtx_equal_p (base_1
, base_2
))
14264 offval_1
= INTVAL (offset_1
);
14265 offval_2
= INTVAL (offset_2
);
14266 msize
= GET_MODE_SIZE (mode
);
14267 /* Check if the offsets are consecutive. */
14268 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
14271 /* Check if the addresses are clobbered by load. */
14274 if (reg_mentioned_p (reg_1
, mem_1
))
14277 /* In increasing order, the last load can clobber the address. */
14278 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
14282 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14283 rclass_1
= FP_REGS
;
14285 rclass_1
= GENERAL_REGS
;
14287 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14288 rclass_2
= FP_REGS
;
14290 rclass_2
= GENERAL_REGS
;
14292 /* Check if the registers are of same class. */
14293 if (rclass_1
!= rclass_2
)
14299 /* Given OPERANDS of consecutive load/store, check if we can merge
14300 them into ldp/stp by adjusting the offset. LOAD is true if they
14301 are load instructions. MODE is the mode of memory operands.
14303 Given below consecutive stores:
14305 str w1, [xb, 0x100]
14306 str w1, [xb, 0x104]
14307 str w1, [xb, 0x108]
14308 str w1, [xb, 0x10c]
14310 Though the offsets are out of the range supported by stp, we can
14311 still pair them after adjusting the offset, like:
14313 add scratch, xb, 0x100
14314 stp w1, w1, [scratch]
14315 stp w1, w1, [scratch, 0x8]
14317 The peephole patterns detecting this opportunity should guarantee
14318 the scratch register is avaliable. */
14321 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
14322 enum machine_mode mode
)
14324 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
14325 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
14326 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
14327 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
14331 reg_1
= operands
[0];
14332 mem_1
= operands
[1];
14333 reg_2
= operands
[2];
14334 mem_2
= operands
[3];
14335 reg_3
= operands
[4];
14336 mem_3
= operands
[5];
14337 reg_4
= operands
[6];
14338 mem_4
= operands
[7];
14339 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
14340 && REG_P (reg_3
) && REG_P (reg_4
));
14341 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
14346 mem_1
= operands
[0];
14347 reg_1
= operands
[1];
14348 mem_2
= operands
[2];
14349 reg_2
= operands
[3];
14350 mem_3
= operands
[4];
14351 reg_3
= operands
[5];
14352 mem_4
= operands
[6];
14353 reg_4
= operands
[7];
14355 /* Skip if memory operand is by itslef valid for ldp/stp. */
14356 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
14359 /* The mems cannot be volatile. */
14360 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
14361 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
14364 /* Check if the addresses are in the form of [base+offset]. */
14365 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14366 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14368 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14369 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14371 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
14372 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
14374 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
14375 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
14378 /* Check if the bases are same. */
14379 if (!rtx_equal_p (base_1
, base_2
)
14380 || !rtx_equal_p (base_2
, base_3
)
14381 || !rtx_equal_p (base_3
, base_4
))
14384 offval_1
= INTVAL (offset_1
);
14385 offval_2
= INTVAL (offset_2
);
14386 offval_3
= INTVAL (offset_3
);
14387 offval_4
= INTVAL (offset_4
);
14388 msize
= GET_MODE_SIZE (mode
);
14389 /* Check if the offsets are consecutive. */
14390 if ((offval_1
!= (offval_2
+ msize
)
14391 || offval_1
!= (offval_3
+ msize
* 2)
14392 || offval_1
!= (offval_4
+ msize
* 3))
14393 && (offval_4
!= (offval_3
+ msize
)
14394 || offval_4
!= (offval_2
+ msize
* 2)
14395 || offval_4
!= (offval_1
+ msize
* 3)))
14398 /* Check if the addresses are clobbered by load. */
14401 if (reg_mentioned_p (reg_1
, mem_1
)
14402 || reg_mentioned_p (reg_2
, mem_2
)
14403 || reg_mentioned_p (reg_3
, mem_3
))
14406 /* In increasing order, the last load can clobber the address. */
14407 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
14411 /* If we have SImode and slow unaligned ldp,
14412 check the alignment to be at least 8 byte. */
14414 && (aarch64_tune_params
.extra_tuning_flags
14415 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14417 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14420 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14421 rclass_1
= FP_REGS
;
14423 rclass_1
= GENERAL_REGS
;
14425 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14426 rclass_2
= FP_REGS
;
14428 rclass_2
= GENERAL_REGS
;
14430 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
14431 rclass_3
= FP_REGS
;
14433 rclass_3
= GENERAL_REGS
;
14435 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
14436 rclass_4
= FP_REGS
;
14438 rclass_4
= GENERAL_REGS
;
14440 /* Check if the registers are of same class. */
14441 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
14447 /* Given OPERANDS of consecutive load/store, this function pairs them
14448 into ldp/stp after adjusting the offset. It depends on the fact
14449 that addresses of load/store instructions are in increasing order.
14450 MODE is the mode of memory operands. CODE is the rtl operator
14451 which should be applied to all memory operands, it's SIGN_EXTEND,
14452 ZERO_EXTEND or UNKNOWN. */
14455 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
14456 enum machine_mode mode
, RTX_CODE code
)
14458 rtx base
, offset
, t1
, t2
;
14459 rtx mem_1
, mem_2
, mem_3
, mem_4
;
14460 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
14464 mem_1
= operands
[1];
14465 mem_2
= operands
[3];
14466 mem_3
= operands
[5];
14467 mem_4
= operands
[7];
14471 mem_1
= operands
[0];
14472 mem_2
= operands
[2];
14473 mem_3
= operands
[4];
14474 mem_4
= operands
[6];
14475 gcc_assert (code
== UNKNOWN
);
14478 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
14479 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
14481 /* Adjust offset thus it can fit in ldp/stp instruction. */
14482 msize
= GET_MODE_SIZE (mode
);
14483 stp_off_limit
= msize
* 0x40;
14484 off_val
= INTVAL (offset
);
14485 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
14486 new_off
= abs_off
% stp_off_limit
;
14487 adj_off
= abs_off
- new_off
;
14489 /* Further adjust to make sure all offsets are OK. */
14490 if ((new_off
+ msize
* 2) >= stp_off_limit
)
14492 adj_off
+= stp_off_limit
;
14493 new_off
-= stp_off_limit
;
14496 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14497 if (adj_off
>= 0x1000)
14502 adj_off
= -adj_off
;
14503 new_off
= -new_off
;
14506 /* Create new memory references. */
14507 mem_1
= change_address (mem_1
, VOIDmode
,
14508 plus_constant (DImode
, operands
[8], new_off
));
14510 /* Check if the adjusted address is OK for ldp/stp. */
14511 if (!aarch64_mem_pair_operand (mem_1
, mode
))
14514 msize
= GET_MODE_SIZE (mode
);
14515 mem_2
= change_address (mem_2
, VOIDmode
,
14516 plus_constant (DImode
,
14519 mem_3
= change_address (mem_3
, VOIDmode
,
14520 plus_constant (DImode
,
14522 new_off
+ msize
* 2));
14523 mem_4
= change_address (mem_4
, VOIDmode
,
14524 plus_constant (DImode
,
14526 new_off
+ msize
* 3));
14528 if (code
== ZERO_EXTEND
)
14530 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
14531 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
14532 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
14533 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
14535 else if (code
== SIGN_EXTEND
)
14537 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
14538 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
14539 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
14540 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
14545 operands
[1] = mem_1
;
14546 operands
[3] = mem_2
;
14547 operands
[5] = mem_3
;
14548 operands
[7] = mem_4
;
14552 operands
[0] = mem_1
;
14553 operands
[2] = mem_2
;
14554 operands
[4] = mem_3
;
14555 operands
[6] = mem_4
;
14558 /* Emit adjusting instruction. */
14559 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
14560 /* Emit ldp/stp instructions. */
14561 t1
= gen_rtx_SET (operands
[0], operands
[1]);
14562 t2
= gen_rtx_SET (operands
[2], operands
[3]);
14563 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14564 t1
= gen_rtx_SET (operands
[4], operands
[5]);
14565 t2
= gen_rtx_SET (operands
[6], operands
[7]);
14566 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14570 /* Return 1 if pseudo register should be created and used to hold
14571 GOT address for PIC code. */
14574 aarch64_use_pseudo_pic_reg (void)
14576 return aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
;
14579 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14582 aarch64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
14584 switch (XINT (x
, 1))
14586 case UNSPEC_GOTSMALLPIC
:
14587 case UNSPEC_GOTSMALLPIC28K
:
14588 case UNSPEC_GOTTINYPIC
:
14594 return default_unspec_may_trap_p (x
, flags
);
14598 /* If X is a positive CONST_DOUBLE with a value that is a power of 2
14599 return the log2 of that value. Otherwise return -1. */
14602 aarch64_fpconst_pow_of_2 (rtx x
)
14604 const REAL_VALUE_TYPE
*r
;
14606 if (!CONST_DOUBLE_P (x
))
14609 r
= CONST_DOUBLE_REAL_VALUE (x
);
14611 if (REAL_VALUE_NEGATIVE (*r
)
14612 || REAL_VALUE_ISNAN (*r
)
14613 || REAL_VALUE_ISINF (*r
)
14614 || !real_isinteger (r
, DFmode
))
14617 return exact_log2 (real_to_integer (r
));
14620 /* If X is a vector of equal CONST_DOUBLE values and that value is
14621 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14624 aarch64_vec_fpconst_pow_of_2 (rtx x
)
14626 if (GET_CODE (x
) != CONST_VECTOR
)
14629 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
14632 int firstval
= aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, 0));
14636 for (int i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
14637 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, i
)) != firstval
)
14643 /* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14646 __fp16 always promotes through this hook.
14647 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14648 through the generic excess precision logic rather than here. */
14651 aarch64_promoted_type (const_tree t
)
14653 if (SCALAR_FLOAT_TYPE_P (t
)
14654 && TYPE_MAIN_VARIANT (t
) == aarch64_fp16_type_node
)
14655 return float_type_node
;
14660 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14663 aarch64_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
14664 optimization_type opt_type
)
14669 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p (mode1
);
14676 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14677 if MODE is HFmode, and punt to the generic implementation otherwise. */
14680 aarch64_libgcc_floating_mode_supported_p (machine_mode mode
)
14682 return (mode
== HFmode
14684 : default_libgcc_floating_mode_supported_p (mode
));
14687 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
14688 if MODE is HFmode, and punt to the generic implementation otherwise. */
14691 aarch64_scalar_mode_supported_p (machine_mode mode
)
14693 return (mode
== HFmode
14695 : default_scalar_mode_supported_p (mode
));
14698 /* Set the value of FLT_EVAL_METHOD.
14699 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
14701 0: evaluate all operations and constants, whose semantic type has at
14702 most the range and precision of type float, to the range and
14703 precision of float; evaluate all other operations and constants to
14704 the range and precision of the semantic type;
14706 N, where _FloatN is a supported interchange floating type
14707 evaluate all operations and constants, whose semantic type has at
14708 most the range and precision of _FloatN type, to the range and
14709 precision of the _FloatN type; evaluate all other operations and
14710 constants to the range and precision of the semantic type;
14712 If we have the ARMv8.2-A extensions then we support _Float16 in native
14713 precision, so we should set this to 16. Otherwise, we support the type,
14714 but want to evaluate expressions in float precision, so set this to
14717 static enum flt_eval_method
14718 aarch64_excess_precision (enum excess_precision_type type
)
14722 case EXCESS_PRECISION_TYPE_FAST
:
14723 case EXCESS_PRECISION_TYPE_STANDARD
:
14724 /* We can calculate either in 16-bit range and precision or
14725 32-bit range and precision. Make that decision based on whether
14726 we have native support for the ARMv8.2-A 16-bit floating-point
14727 instructions or not. */
14728 return (TARGET_FP_F16INST
14729 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
14730 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
14731 case EXCESS_PRECISION_TYPE_IMPLICIT
:
14732 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
14734 gcc_unreachable ();
14736 return FLT_EVAL_METHOD_UNPREDICTABLE
;
14739 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
14740 scheduled for speculative execution. Reject the long-running division
14741 and square-root instructions. */
14744 aarch64_sched_can_speculate_insn (rtx_insn
*insn
)
14746 switch (get_attr_type (insn
))
14754 case TYPE_NEON_FP_SQRT_S
:
14755 case TYPE_NEON_FP_SQRT_D
:
14756 case TYPE_NEON_FP_SQRT_S_Q
:
14757 case TYPE_NEON_FP_SQRT_D_Q
:
14758 case TYPE_NEON_FP_DIV_S
:
14759 case TYPE_NEON_FP_DIV_D
:
14760 case TYPE_NEON_FP_DIV_S_Q
:
14761 case TYPE_NEON_FP_DIV_D_Q
:
14768 /* Target-specific selftests. */
14772 namespace selftest
{
14774 /* Selftest for the RTL loader.
14775 Verify that the RTL loader copes with a dump from
14776 print_rtx_function. This is essentially just a test that class
14777 function_reader can handle a real dump, but it also verifies
14778 that lookup_reg_by_dump_name correctly handles hard regs.
14779 The presence of hard reg names in the dump means that the test is
14780 target-specific, hence it is in this file. */
14783 aarch64_test_loading_full_dump ()
14785 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("aarch64/times-two.rtl"));
14787 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
14789 rtx_insn
*insn_1
= get_insn_by_uid (1);
14790 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
14792 rtx_insn
*insn_15
= get_insn_by_uid (15);
14793 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
14794 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
14796 /* Verify crtl->return_rtx. */
14797 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
14798 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
14799 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
14802 /* Run all target-specific selftests. */
14805 aarch64_run_selftests (void)
14807 aarch64_test_loading_full_dump ();
14810 } // namespace selftest
14812 #endif /* #if CHECKING_P */
14814 #undef TARGET_ADDRESS_COST
14815 #define TARGET_ADDRESS_COST aarch64_address_cost
14817 /* This hook will determines whether unnamed bitfields affect the alignment
14818 of the containing structure. The hook returns true if the structure
14819 should inherit the alignment requirements of an unnamed bitfield's
14821 #undef TARGET_ALIGN_ANON_BITFIELD
14822 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14824 #undef TARGET_ASM_ALIGNED_DI_OP
14825 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14827 #undef TARGET_ASM_ALIGNED_HI_OP
14828 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14830 #undef TARGET_ASM_ALIGNED_SI_OP
14831 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14833 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14834 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
14835 hook_bool_const_tree_hwi_hwi_const_tree_true
14837 #undef TARGET_ASM_FILE_START
14838 #define TARGET_ASM_FILE_START aarch64_start_file
14840 #undef TARGET_ASM_OUTPUT_MI_THUNK
14841 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
14843 #undef TARGET_ASM_SELECT_RTX_SECTION
14844 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
14846 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14847 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
14849 #undef TARGET_BUILD_BUILTIN_VA_LIST
14850 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
14852 #undef TARGET_CALLEE_COPIES
14853 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
14855 #undef TARGET_CAN_ELIMINATE
14856 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
14858 #undef TARGET_CAN_INLINE_P
14859 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
14861 #undef TARGET_CANNOT_FORCE_CONST_MEM
14862 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
14864 #undef TARGET_CASE_VALUES_THRESHOLD
14865 #define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
14867 #undef TARGET_CONDITIONAL_REGISTER_USAGE
14868 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
14870 /* Only the least significant bit is used for initialization guard
14872 #undef TARGET_CXX_GUARD_MASK_BIT
14873 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
14875 #undef TARGET_C_MODE_FOR_SUFFIX
14876 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
14878 #ifdef TARGET_BIG_ENDIAN_DEFAULT
14879 #undef TARGET_DEFAULT_TARGET_FLAGS
14880 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
14883 #undef TARGET_CLASS_MAX_NREGS
14884 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
14886 #undef TARGET_BUILTIN_DECL
14887 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
14889 #undef TARGET_BUILTIN_RECIPROCAL
14890 #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
14892 #undef TARGET_C_EXCESS_PRECISION
14893 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
14895 #undef TARGET_EXPAND_BUILTIN
14896 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
14898 #undef TARGET_EXPAND_BUILTIN_VA_START
14899 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
14901 #undef TARGET_FOLD_BUILTIN
14902 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
14904 #undef TARGET_FUNCTION_ARG
14905 #define TARGET_FUNCTION_ARG aarch64_function_arg
14907 #undef TARGET_FUNCTION_ARG_ADVANCE
14908 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
14910 #undef TARGET_FUNCTION_ARG_BOUNDARY
14911 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
14913 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
14914 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
14916 #undef TARGET_FUNCTION_VALUE
14917 #define TARGET_FUNCTION_VALUE aarch64_function_value
14919 #undef TARGET_FUNCTION_VALUE_REGNO_P
14920 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
14922 #undef TARGET_FRAME_POINTER_REQUIRED
14923 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
14925 #undef TARGET_GIMPLE_FOLD_BUILTIN
14926 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
14928 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
14929 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
14931 #undef TARGET_INIT_BUILTINS
14932 #define TARGET_INIT_BUILTINS aarch64_init_builtins
14934 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
14935 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
14936 aarch64_ira_change_pseudo_allocno_class
14938 #undef TARGET_LEGITIMATE_ADDRESS_P
14939 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
14941 #undef TARGET_LEGITIMATE_CONSTANT_P
14942 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
14944 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
14945 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
14946 aarch64_legitimize_address_displacement
14948 #undef TARGET_LIBGCC_CMP_RETURN_MODE
14949 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
14951 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
14952 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
14953 aarch64_libgcc_floating_mode_supported_p
14955 #undef TARGET_MANGLE_TYPE
14956 #define TARGET_MANGLE_TYPE aarch64_mangle_type
14958 #undef TARGET_MEMORY_MOVE_COST
14959 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
14961 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
14962 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
14964 #undef TARGET_MUST_PASS_IN_STACK
14965 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
14967 /* This target hook should return true if accesses to volatile bitfields
14968 should use the narrowest mode possible. It should return false if these
14969 accesses should use the bitfield container type. */
14970 #undef TARGET_NARROW_VOLATILE_BITFIELD
14971 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
14973 #undef TARGET_OPTION_OVERRIDE
14974 #define TARGET_OPTION_OVERRIDE aarch64_override_options
14976 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
14977 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
14978 aarch64_override_options_after_change
14980 #undef TARGET_OPTION_SAVE
14981 #define TARGET_OPTION_SAVE aarch64_option_save
14983 #undef TARGET_OPTION_RESTORE
14984 #define TARGET_OPTION_RESTORE aarch64_option_restore
14986 #undef TARGET_OPTION_PRINT
14987 #define TARGET_OPTION_PRINT aarch64_option_print
14989 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
14990 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
14992 #undef TARGET_SET_CURRENT_FUNCTION
14993 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
14995 #undef TARGET_PASS_BY_REFERENCE
14996 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
14998 #undef TARGET_PREFERRED_RELOAD_CLASS
14999 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15001 #undef TARGET_SCHED_REASSOCIATION_WIDTH
15002 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15004 #undef TARGET_PROMOTED_TYPE
15005 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
15007 #undef TARGET_SECONDARY_RELOAD
15008 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15010 #undef TARGET_SHIFT_TRUNCATION_MASK
15011 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15013 #undef TARGET_SETUP_INCOMING_VARARGS
15014 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15016 #undef TARGET_STRUCT_VALUE_RTX
15017 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15019 #undef TARGET_REGISTER_MOVE_COST
15020 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15022 #undef TARGET_RETURN_IN_MEMORY
15023 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15025 #undef TARGET_RETURN_IN_MSB
15026 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15028 #undef TARGET_RTX_COSTS
15029 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
15031 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15032 #define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15034 #undef TARGET_SCHED_ISSUE_RATE
15035 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15037 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15038 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15039 aarch64_sched_first_cycle_multipass_dfa_lookahead
15041 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15042 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15043 aarch64_first_cycle_multipass_dfa_lookahead_guard
15045 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15046 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15047 aarch64_get_separate_components
15049 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15050 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15051 aarch64_components_for_bb
15053 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15054 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15055 aarch64_disqualify_components
15057 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15058 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15059 aarch64_emit_prologue_components
15061 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15062 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15063 aarch64_emit_epilogue_components
15065 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15066 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15067 aarch64_set_handled_components
15069 #undef TARGET_TRAMPOLINE_INIT
15070 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15072 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15073 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15075 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15076 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15078 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15079 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15080 aarch64_builtin_support_vector_misalignment
15082 #undef TARGET_ARRAY_MODE_SUPPORTED_P
15083 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15085 #undef TARGET_VECTORIZE_ADD_STMT_COST
15086 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15088 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15089 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15090 aarch64_builtin_vectorization_cost
15092 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15093 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15095 #undef TARGET_VECTORIZE_BUILTINS
15096 #define TARGET_VECTORIZE_BUILTINS
15098 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15099 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15100 aarch64_builtin_vectorized_function
15102 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15103 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15104 aarch64_autovectorize_vector_sizes
15106 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15107 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15108 aarch64_atomic_assign_expand_fenv
15110 /* Section anchor support. */
15112 #undef TARGET_MIN_ANCHOR_OFFSET
15113 #define TARGET_MIN_ANCHOR_OFFSET -256
15115 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15116 byte offset; we can do much more for larger data types, but have no way
15117 to determine the size of the access. We assume accesses are aligned. */
15118 #undef TARGET_MAX_ANCHOR_OFFSET
15119 #define TARGET_MAX_ANCHOR_OFFSET 4095
15121 #undef TARGET_VECTOR_ALIGNMENT
15122 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15124 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15125 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15126 aarch64_simd_vector_alignment_reachable
15128 /* vec_perm support. */
15130 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15131 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15132 aarch64_vectorize_vec_perm_const_ok
15134 #undef TARGET_INIT_LIBFUNCS
15135 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
15137 #undef TARGET_FIXED_CONDITION_CODE_REGS
15138 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15140 #undef TARGET_FLAGS_REGNUM
15141 #define TARGET_FLAGS_REGNUM CC_REGNUM
15143 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15144 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15146 #undef TARGET_ASAN_SHADOW_OFFSET
15147 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15149 #undef TARGET_LEGITIMIZE_ADDRESS
15150 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15152 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15153 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15154 aarch64_use_by_pieces_infrastructure_p
15156 #undef TARGET_SCHED_CAN_SPECULATE_INSN
15157 #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15159 #undef TARGET_CAN_USE_DOLOOP_P
15160 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15162 #undef TARGET_SCHED_ADJUST_PRIORITY
15163 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15165 #undef TARGET_SCHED_MACRO_FUSION_P
15166 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15168 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15169 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15171 #undef TARGET_SCHED_FUSION_PRIORITY
15172 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15174 #undef TARGET_UNSPEC_MAY_TRAP_P
15175 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15177 #undef TARGET_USE_PSEUDO_PIC_REG
15178 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15180 #undef TARGET_PRINT_OPERAND
15181 #define TARGET_PRINT_OPERAND aarch64_print_operand
15183 #undef TARGET_PRINT_OPERAND_ADDRESS
15184 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15186 #undef TARGET_OPTAB_SUPPORTED_P
15187 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15189 #undef TARGET_OMIT_STRUCT_RETURN_REG
15190 #define TARGET_OMIT_STRUCT_RETURN_REG true
15192 /* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15193 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15194 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15197 #undef TARGET_RUN_TARGET_SELFTESTS
15198 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15199 #endif /* #if CHECKING_P */
15201 struct gcc_target targetm
= TARGET_INITIALIZER
;
15203 #include "gt-aarch64.h"