1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
40 #include "diagnostic-core.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
61 #include "target-globals.h"
63 #include "tm-constrs.h"
65 #include "optabs-libfuncs.h"
69 /* This file should be included last. */
70 #include "target-def.h"
72 /* Forward definitions of types. */
73 typedef struct minipool_node Mnode
;
74 typedef struct minipool_fixup Mfix
;
76 void (*arm_lang_output_object_attributes_hook
)(void);
83 /* Forward function declarations. */
84 static bool arm_const_not_ok_for_debug_p (rtx
);
85 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
86 static int arm_compute_static_chain_stack_bytes (void);
87 static arm_stack_offsets
*arm_get_frame_offsets (void);
88 static void arm_add_gc_roots (void);
89 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
90 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
91 static unsigned bit_count (unsigned long);
92 static unsigned bitmap_popcount (const sbitmap
);
93 static int arm_address_register_rtx_p (rtx
, int);
94 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
95 static bool is_called_in_ARM_mode (tree
);
96 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
97 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
98 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
99 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
100 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
101 inline static int thumb1_index_register_rtx_p (rtx
, int);
102 static int thumb_far_jump_used_p (void);
103 static bool thumb_force_lr_save (void);
104 static unsigned arm_size_return_regs (void);
105 static bool arm_assemble_integer (rtx
, unsigned int, int);
106 static void arm_print_operand (FILE *, rtx
, int);
107 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
108 static bool arm_print_operand_punct_valid_p (unsigned char code
);
109 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
110 static arm_cc
get_arm_condition_code (rtx
);
111 static const char *output_multi_immediate (rtx
*, const char *, const char *,
113 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
114 static struct machine_function
*arm_init_machine_status (void);
115 static void thumb_exit (FILE *, int);
116 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
117 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
118 static Mnode
*add_minipool_forward_ref (Mfix
*);
119 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
120 static Mnode
*add_minipool_backward_ref (Mfix
*);
121 static void assign_minipool_offsets (Mfix
*);
122 static void arm_print_value (FILE *, rtx
);
123 static void dump_minipool (rtx_insn
*);
124 static int arm_barrier_cost (rtx_insn
*);
125 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
126 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
127 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
129 static void arm_reorg (void);
130 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
131 static unsigned long arm_compute_save_reg0_reg12_mask (void);
132 static unsigned long arm_compute_save_reg_mask (void);
133 static unsigned long arm_isr_value (tree
);
134 static unsigned long arm_compute_func_type (void);
135 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
136 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
137 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
138 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
139 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
142 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
143 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
144 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
145 static int arm_comp_type_attributes (const_tree
, const_tree
);
146 static void arm_set_default_type_attributes (tree
);
147 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
148 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
149 static int optimal_immediate_sequence (enum rtx_code code
,
150 unsigned HOST_WIDE_INT val
,
151 struct four_ints
*return_sequence
);
152 static int optimal_immediate_sequence_1 (enum rtx_code code
,
153 unsigned HOST_WIDE_INT val
,
154 struct four_ints
*return_sequence
,
156 static int arm_get_strip_length (int);
157 static bool arm_function_ok_for_sibcall (tree
, tree
);
158 static machine_mode
arm_promote_function_mode (const_tree
,
161 static bool arm_return_in_memory (const_tree
, const_tree
);
162 static rtx
arm_function_value (const_tree
, const_tree
, bool);
163 static rtx
arm_libcall_value_1 (machine_mode
);
164 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
165 static bool arm_function_value_regno_p (const unsigned int);
166 static void arm_internal_label (FILE *, const char *, unsigned long);
167 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
169 static bool arm_have_conditional_execution (void);
170 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
171 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
172 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
173 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
174 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
175 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
176 static void emit_constant_insn (rtx cond
, rtx pattern
);
177 static rtx_insn
*emit_set_insn (rtx
, rtx
);
178 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
179 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
181 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
183 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
185 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
186 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
188 static rtx
aapcs_libcall_value (machine_mode
);
189 static int aapcs_select_return_coproc (const_tree
, const_tree
);
191 #ifdef OBJECT_FORMAT_ELF
192 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
193 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
196 static void arm_encode_section_info (tree
, rtx
, int);
199 static void arm_file_end (void);
200 static void arm_file_start (void);
201 static void arm_insert_attributes (tree
, tree
*);
203 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
205 static bool arm_pass_by_reference (cumulative_args_t
,
206 machine_mode
, const_tree
, bool);
207 static bool arm_promote_prototypes (const_tree
);
208 static bool arm_default_short_enums (void);
209 static bool arm_align_anon_bitfield (void);
210 static bool arm_return_in_msb (const_tree
);
211 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
212 static bool arm_return_in_memory (const_tree
, const_tree
);
214 static void arm_unwind_emit (FILE *, rtx_insn
*);
215 static bool arm_output_ttype (rtx
);
216 static void arm_asm_emit_except_personality (rtx
);
218 static void arm_asm_init_sections (void);
219 static rtx
arm_dwarf_register_span (rtx
);
221 static tree
arm_cxx_guard_type (void);
222 static bool arm_cxx_guard_mask_bit (void);
223 static tree
arm_get_cookie_size (tree
);
224 static bool arm_cookie_has_size (void);
225 static bool arm_cxx_cdtor_returns_this (void);
226 static bool arm_cxx_key_method_may_be_inline (void);
227 static void arm_cxx_determine_class_data_visibility (tree
);
228 static bool arm_cxx_class_data_always_comdat (void);
229 static bool arm_cxx_use_aeabi_atexit (void);
230 static void arm_init_libfuncs (void);
231 static tree
arm_build_builtin_va_list (void);
232 static void arm_expand_builtin_va_start (tree
, rtx
);
233 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
234 static void arm_option_override (void);
235 static void arm_option_restore (struct gcc_options
*,
236 struct cl_target_option
*);
237 static void arm_override_options_after_change (void);
238 static void arm_option_print (FILE *, int, struct cl_target_option
*);
239 static void arm_set_current_function (tree
);
240 static bool arm_can_inline_p (tree
, tree
);
241 static void arm_relayout_function (tree
);
242 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
243 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
244 static bool arm_sched_can_speculate_insn (rtx_insn
*);
245 static bool arm_macro_fusion_p (void);
246 static bool arm_cannot_copy_insn_p (rtx_insn
*);
247 static int arm_issue_rate (void);
248 static int arm_first_cycle_multipass_dfa_lookahead (void);
249 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
250 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
251 static bool arm_output_addr_const_extra (FILE *, rtx
);
252 static bool arm_allocate_stack_slots_for_args (void);
253 static bool arm_warn_func_return (tree
);
254 static tree
arm_promoted_type (const_tree t
);
255 static bool arm_scalar_mode_supported_p (machine_mode
);
256 static bool arm_frame_pointer_required (void);
257 static bool arm_can_eliminate (const int, const int);
258 static void arm_asm_trampoline_template (FILE *);
259 static void arm_trampoline_init (rtx
, tree
, rtx
);
260 static rtx
arm_trampoline_adjust_address (rtx
);
261 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
262 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
263 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
264 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
265 static bool arm_array_mode_supported_p (machine_mode
,
266 unsigned HOST_WIDE_INT
);
267 static machine_mode
arm_preferred_simd_mode (machine_mode
);
268 static bool arm_class_likely_spilled_p (reg_class_t
);
269 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
270 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
271 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
275 static void arm_conditional_register_usage (void);
276 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
277 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
285 const unsigned char *sel
);
287 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
291 int misalign ATTRIBUTE_UNUSED
);
292 static unsigned arm_add_stmt_cost (void *data
, int count
,
293 enum vect_cost_for_stmt kind
,
294 struct _stmt_vec_info
*stmt_info
,
296 enum vect_cost_model_location where
);
298 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
299 bool op0_preserve_value
);
300 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
302 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
303 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
305 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
306 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
307 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
309 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
310 static machine_mode
arm_floatn_mode (int, bool);
312 /* Table of machine attributes. */
313 static const struct attribute_spec arm_attribute_table
[] =
315 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
316 affects_type_identity } */
317 /* Function calls made to this symbol must be done indirectly, because
318 it may lie outside of the 26 bit addressing range of a normal function
320 { "long_call", 0, 0, false, true, true, NULL
, false },
321 /* Whereas these functions are always known to reside within the 26 bit
323 { "short_call", 0, 0, false, true, true, NULL
, false },
324 /* Specify the procedure call conventions for a function. */
325 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
327 /* Interrupt Service Routines have special prologue and epilogue requirements. */
328 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
330 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
332 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
335 /* ARM/PE has three new attributes:
337 dllexport - for exporting a function/variable that will live in a dll
338 dllimport - for importing a function/variable from a dll
340 Microsoft allows multiple declspecs in one __declspec, separating
341 them with spaces. We do NOT support this. Instead, use __declspec
344 { "dllimport", 0, 0, true, false, false, NULL
, false },
345 { "dllexport", 0, 0, true, false, false, NULL
, false },
346 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
348 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
349 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
351 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
354 /* ARMv8-M Security Extensions support. */
355 { "cmse_nonsecure_entry", 0, 0, true, false, false,
356 arm_handle_cmse_nonsecure_entry
, false },
357 { "cmse_nonsecure_call", 0, 0, true, false, false,
358 arm_handle_cmse_nonsecure_call
, true },
359 { NULL
, 0, 0, false, false, false, NULL
, false }
362 /* Initialize the GCC target structure. */
363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
364 #undef TARGET_MERGE_DECL_ATTRIBUTES
365 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
368 #undef TARGET_LEGITIMIZE_ADDRESS
369 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
371 #undef TARGET_ATTRIBUTE_TABLE
372 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
374 #undef TARGET_INSERT_ATTRIBUTES
375 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
377 #undef TARGET_ASM_FILE_START
378 #define TARGET_ASM_FILE_START arm_file_start
379 #undef TARGET_ASM_FILE_END
380 #define TARGET_ASM_FILE_END arm_file_end
382 #undef TARGET_ASM_ALIGNED_SI_OP
383 #define TARGET_ASM_ALIGNED_SI_OP NULL
384 #undef TARGET_ASM_INTEGER
385 #define TARGET_ASM_INTEGER arm_assemble_integer
387 #undef TARGET_PRINT_OPERAND
388 #define TARGET_PRINT_OPERAND arm_print_operand
389 #undef TARGET_PRINT_OPERAND_ADDRESS
390 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
391 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
392 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
397 #undef TARGET_ASM_FUNCTION_PROLOGUE
398 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
400 #undef TARGET_ASM_FUNCTION_EPILOGUE
401 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
403 #undef TARGET_CAN_INLINE_P
404 #define TARGET_CAN_INLINE_P arm_can_inline_p
406 #undef TARGET_RELAYOUT_FUNCTION
407 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
409 #undef TARGET_OPTION_OVERRIDE
410 #define TARGET_OPTION_OVERRIDE arm_option_override
412 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
413 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
415 #undef TARGET_OPTION_RESTORE
416 #define TARGET_OPTION_RESTORE arm_option_restore
418 #undef TARGET_OPTION_PRINT
419 #define TARGET_OPTION_PRINT arm_option_print
421 #undef TARGET_COMP_TYPE_ATTRIBUTES
422 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
424 #undef TARGET_SCHED_CAN_SPECULATE_INSN
425 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
427 #undef TARGET_SCHED_MACRO_FUSION_P
428 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
430 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
431 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
433 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
434 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
436 #undef TARGET_SCHED_ADJUST_COST
437 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
439 #undef TARGET_SET_CURRENT_FUNCTION
440 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
442 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
443 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
445 #undef TARGET_SCHED_REORDER
446 #define TARGET_SCHED_REORDER arm_sched_reorder
448 #undef TARGET_REGISTER_MOVE_COST
449 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
451 #undef TARGET_MEMORY_MOVE_COST
452 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
454 #undef TARGET_ENCODE_SECTION_INFO
456 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
458 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
461 #undef TARGET_STRIP_NAME_ENCODING
462 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
464 #undef TARGET_ASM_INTERNAL_LABEL
465 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
467 #undef TARGET_FLOATN_MODE
468 #define TARGET_FLOATN_MODE arm_floatn_mode
470 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
471 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
473 #undef TARGET_FUNCTION_VALUE
474 #define TARGET_FUNCTION_VALUE arm_function_value
476 #undef TARGET_LIBCALL_VALUE
477 #define TARGET_LIBCALL_VALUE arm_libcall_value
479 #undef TARGET_FUNCTION_VALUE_REGNO_P
480 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
482 #undef TARGET_ASM_OUTPUT_MI_THUNK
483 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
484 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
485 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
487 #undef TARGET_RTX_COSTS
488 #define TARGET_RTX_COSTS arm_rtx_costs
489 #undef TARGET_ADDRESS_COST
490 #define TARGET_ADDRESS_COST arm_address_cost
492 #undef TARGET_SHIFT_TRUNCATION_MASK
493 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
494 #undef TARGET_VECTOR_MODE_SUPPORTED_P
495 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
496 #undef TARGET_ARRAY_MODE_SUPPORTED_P
497 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
498 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
499 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
500 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
501 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
502 arm_autovectorize_vector_sizes
504 #undef TARGET_MACHINE_DEPENDENT_REORG
505 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
507 #undef TARGET_INIT_BUILTINS
508 #define TARGET_INIT_BUILTINS arm_init_builtins
509 #undef TARGET_EXPAND_BUILTIN
510 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
511 #undef TARGET_BUILTIN_DECL
512 #define TARGET_BUILTIN_DECL arm_builtin_decl
514 #undef TARGET_INIT_LIBFUNCS
515 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
517 #undef TARGET_PROMOTE_FUNCTION_MODE
518 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
519 #undef TARGET_PROMOTE_PROTOTYPES
520 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
521 #undef TARGET_PASS_BY_REFERENCE
522 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
523 #undef TARGET_ARG_PARTIAL_BYTES
524 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
525 #undef TARGET_FUNCTION_ARG
526 #define TARGET_FUNCTION_ARG arm_function_arg
527 #undef TARGET_FUNCTION_ARG_ADVANCE
528 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
529 #undef TARGET_FUNCTION_ARG_BOUNDARY
530 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
532 #undef TARGET_SETUP_INCOMING_VARARGS
533 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
535 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
536 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
538 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
539 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
540 #undef TARGET_TRAMPOLINE_INIT
541 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
542 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
543 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
545 #undef TARGET_WARN_FUNC_RETURN
546 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
548 #undef TARGET_DEFAULT_SHORT_ENUMS
549 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
551 #undef TARGET_ALIGN_ANON_BITFIELD
552 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
554 #undef TARGET_NARROW_VOLATILE_BITFIELD
555 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
557 #undef TARGET_CXX_GUARD_TYPE
558 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
560 #undef TARGET_CXX_GUARD_MASK_BIT
561 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
563 #undef TARGET_CXX_GET_COOKIE_SIZE
564 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
566 #undef TARGET_CXX_COOKIE_HAS_SIZE
567 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
569 #undef TARGET_CXX_CDTOR_RETURNS_THIS
570 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
572 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
573 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
575 #undef TARGET_CXX_USE_AEABI_ATEXIT
576 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
578 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
579 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
580 arm_cxx_determine_class_data_visibility
582 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
583 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
585 #undef TARGET_RETURN_IN_MSB
586 #define TARGET_RETURN_IN_MSB arm_return_in_msb
588 #undef TARGET_RETURN_IN_MEMORY
589 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
591 #undef TARGET_MUST_PASS_IN_STACK
592 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
595 #undef TARGET_ASM_UNWIND_EMIT
596 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
598 /* EABI unwinding tables use a different format for the typeinfo tables. */
599 #undef TARGET_ASM_TTYPE
600 #define TARGET_ASM_TTYPE arm_output_ttype
602 #undef TARGET_ARM_EABI_UNWINDER
603 #define TARGET_ARM_EABI_UNWINDER true
605 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
606 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
608 #endif /* ARM_UNWIND_INFO */
610 #undef TARGET_ASM_INIT_SECTIONS
611 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
613 #undef TARGET_DWARF_REGISTER_SPAN
614 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
616 #undef TARGET_CANNOT_COPY_INSN_P
617 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
620 #undef TARGET_HAVE_TLS
621 #define TARGET_HAVE_TLS true
624 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
625 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
627 #undef TARGET_LEGITIMATE_CONSTANT_P
628 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
630 #undef TARGET_CANNOT_FORCE_CONST_MEM
631 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
633 #undef TARGET_MAX_ANCHOR_OFFSET
634 #define TARGET_MAX_ANCHOR_OFFSET 4095
636 /* The minimum is set such that the total size of the block
637 for a particular anchor is -4088 + 1 + 4095 bytes, which is
638 divisible by eight, ensuring natural spacing of anchors. */
639 #undef TARGET_MIN_ANCHOR_OFFSET
640 #define TARGET_MIN_ANCHOR_OFFSET -4088
642 #undef TARGET_SCHED_ISSUE_RATE
643 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
645 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
646 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
647 arm_first_cycle_multipass_dfa_lookahead
649 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
650 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
651 arm_first_cycle_multipass_dfa_lookahead_guard
653 #undef TARGET_MANGLE_TYPE
654 #define TARGET_MANGLE_TYPE arm_mangle_type
656 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
657 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
659 #undef TARGET_BUILD_BUILTIN_VA_LIST
660 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
661 #undef TARGET_EXPAND_BUILTIN_VA_START
662 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
663 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
664 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
667 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
668 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
671 #undef TARGET_LEGITIMATE_ADDRESS_P
672 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
674 #undef TARGET_PREFERRED_RELOAD_CLASS
675 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
677 #undef TARGET_PROMOTED_TYPE
678 #define TARGET_PROMOTED_TYPE arm_promoted_type
680 #undef TARGET_SCALAR_MODE_SUPPORTED_P
681 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
683 #undef TARGET_FRAME_POINTER_REQUIRED
684 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
686 #undef TARGET_CAN_ELIMINATE
687 #define TARGET_CAN_ELIMINATE arm_can_eliminate
689 #undef TARGET_CONDITIONAL_REGISTER_USAGE
690 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
692 #undef TARGET_CLASS_LIKELY_SPILLED_P
693 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
695 #undef TARGET_VECTORIZE_BUILTINS
696 #define TARGET_VECTORIZE_BUILTINS
698 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
699 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
700 arm_builtin_vectorized_function
702 #undef TARGET_VECTOR_ALIGNMENT
703 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
705 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
706 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
707 arm_vector_alignment_reachable
709 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
710 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
711 arm_builtin_support_vector_misalignment
713 #undef TARGET_PREFERRED_RENAME_CLASS
714 #define TARGET_PREFERRED_RENAME_CLASS \
715 arm_preferred_rename_class
717 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
718 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
719 arm_vectorize_vec_perm_const_ok
721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
723 arm_builtin_vectorization_cost
724 #undef TARGET_VECTORIZE_ADD_STMT_COST
725 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
727 #undef TARGET_CANONICALIZE_COMPARISON
728 #define TARGET_CANONICALIZE_COMPARISON \
729 arm_canonicalize_comparison
731 #undef TARGET_ASAN_SHADOW_OFFSET
732 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
734 #undef MAX_INSN_PER_IT_BLOCK
735 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
737 #undef TARGET_CAN_USE_DOLOOP_P
738 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
740 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
741 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
743 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
744 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
746 #undef TARGET_SCHED_FUSION_PRIORITY
747 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
749 #undef TARGET_ASM_FUNCTION_SECTION
750 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
752 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
753 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
755 #undef TARGET_SECTION_TYPE_FLAGS
756 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
758 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
759 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
761 #undef TARGET_C_EXCESS_PRECISION
762 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
764 /* Although the architecture reserves bits 0 and 1, only the former is
765 used for ARM/Thumb ISA selection in v7 and earlier versions. */
766 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
767 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
769 struct gcc_target targetm
= TARGET_INITIALIZER
;
771 /* Obstack for minipool constant handling. */
772 static struct obstack minipool_obstack
;
773 static char * minipool_startobj
;
775 /* The maximum number of insns skipped which
776 will be conditionalised if possible. */
777 static int max_insns_skipped
= 5;
779 extern FILE * asm_out_file
;
781 /* True if we are currently building a constant table. */
782 int making_const_table
;
784 /* The processor for which instructions should be scheduled. */
785 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
787 /* The current tuning set. */
788 const struct tune_params
*current_tune
;
790 /* Which floating point hardware to schedule for. */
793 /* Used for Thumb call_via trampolines. */
794 rtx thumb_call_via_label
[14];
795 static int thumb_call_reg_needed
;
797 /* The bits in this mask specify which instruction scheduling options should
799 unsigned int tune_flags
= 0;
801 /* The highest ARM architecture version supported by the
803 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
805 /* Active target architecture and tuning. */
807 struct arm_build_target arm_active_target
;
809 /* The following are used in the arm.md file as equivalents to bits
810 in the above two flag variables. */
812 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
815 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
818 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
821 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
824 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
827 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
830 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
833 /* Nonzero if this chip supports the ARM 6K extensions. */
836 /* Nonzero if this chip supports the ARM 6KZ extensions. */
839 /* Nonzero if instructions present in ARMv6-M can be used. */
842 /* Nonzero if this chip supports the ARM 7 extensions. */
845 /* Nonzero if this chip supports the Large Physical Address Extension. */
846 int arm_arch_lpae
= 0;
848 /* Nonzero if instructions not present in the 'M' profile can be used. */
849 int arm_arch_notm
= 0;
851 /* Nonzero if instructions present in ARMv7E-M can be used. */
854 /* Nonzero if instructions present in ARMv8 can be used. */
857 /* Nonzero if this chip supports the ARMv8.1 extensions. */
860 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
863 /* Nonzero if this chip supports the FP16 instructions extension of ARM
865 int arm_fp16_inst
= 0;
867 /* Nonzero if this chip can benefit from load scheduling. */
868 int arm_ld_sched
= 0;
870 /* Nonzero if this chip is a StrongARM. */
871 int arm_tune_strongarm
= 0;
873 /* Nonzero if this chip supports Intel Wireless MMX technology. */
874 int arm_arch_iwmmxt
= 0;
876 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
877 int arm_arch_iwmmxt2
= 0;
879 /* Nonzero if this chip is an XScale. */
880 int arm_arch_xscale
= 0;
882 /* Nonzero if tuning for XScale */
883 int arm_tune_xscale
= 0;
885 /* Nonzero if we want to tune for stores that access the write-buffer.
886 This typically means an ARM6 or ARM7 with MMU or MPU. */
887 int arm_tune_wbuf
= 0;
889 /* Nonzero if tuning for Cortex-A9. */
890 int arm_tune_cortex_a9
= 0;
892 /* Nonzero if we should define __THUMB_INTERWORK__ in the
894 XXX This is a bit of a hack, it's intended to help work around
895 problems in GLD which doesn't understand that armv5t code is
896 interworking clean. */
897 int arm_cpp_interwork
= 0;
899 /* Nonzero if chip supports Thumb 1. */
902 /* Nonzero if chip supports Thumb 2. */
905 /* Nonzero if chip supports integer division instruction. */
906 int arm_arch_arm_hwdiv
;
907 int arm_arch_thumb_hwdiv
;
909 /* Nonzero if chip disallows volatile memory access in IT block. */
910 int arm_arch_no_volatile_ce
;
912 /* Nonzero if we should use Neon to handle 64-bits operations rather
913 than core registers. */
914 int prefer_neon_for_64bits
= 0;
916 /* Nonzero if we shouldn't use literal pools. */
917 bool arm_disable_literal_pool
= false;
919 /* The register number to be used for the PIC offset register. */
920 unsigned arm_pic_register
= INVALID_REGNUM
;
922 enum arm_pcs arm_pcs_default
;
924 /* For an explanation of these variables, see final_prescan_insn below. */
926 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
927 enum arm_cond_code arm_current_cc
;
930 int arm_target_label
;
931 /* The number of conditionally executed insns, including the current insn. */
932 int arm_condexec_count
= 0;
933 /* A bitmask specifying the patterns for the IT block.
934 Zero means do not output an IT block before this insn. */
935 int arm_condexec_mask
= 0;
936 /* The number of bits used in arm_condexec_mask. */
937 int arm_condexec_masklen
= 0;
939 /* Nonzero if chip supports the ARMv8 CRC instructions. */
940 int arm_arch_crc
= 0;
942 /* Nonzero if chip supports the ARMv8-M security extensions. */
943 int arm_arch_cmse
= 0;
945 /* Nonzero if the core has a very small, high-latency, multiply unit. */
946 int arm_m_profile_small_mul
= 0;
948 /* The condition codes of the ARM, and the inverse function. */
949 static const char * const arm_condition_codes
[] =
951 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
952 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
955 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
956 int arm_regs_in_sequence
[] =
958 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
961 #define ARM_LSL_NAME "lsl"
962 #define streq(string1, string2) (strcmp (string1, string2) == 0)
964 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
965 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
966 | (1 << PIC_OFFSET_TABLE_REGNUM)))
968 /* Initialization code. */
972 const char *const name
;
973 enum processor_type core
;
974 unsigned int tune_flags
;
976 enum base_architecture base_arch
;
977 enum isa_feature isa_bits
[isa_num_bits
];
978 const struct tune_params
*const tune
;
982 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
983 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
990 /* arm generic vectorizer costs. */
992 struct cpu_vec_costs arm_default_vec_cost
= {
993 1, /* scalar_stmt_cost. */
994 1, /* scalar load_cost. */
995 1, /* scalar_store_cost. */
996 1, /* vec_stmt_cost. */
997 1, /* vec_to_scalar_cost. */
998 1, /* scalar_to_vec_cost. */
999 1, /* vec_align_load_cost. */
1000 1, /* vec_unalign_load_cost. */
1001 1, /* vec_unalign_store_cost. */
1002 1, /* vec_store_cost. */
1003 3, /* cond_taken_branch_cost. */
1004 1, /* cond_not_taken_branch_cost. */
1007 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1008 #include "aarch-cost-tables.h"
1012 const struct cpu_cost_table cortexa9_extra_costs
=
1019 COSTS_N_INSNS (1), /* shift_reg. */
1020 COSTS_N_INSNS (1), /* arith_shift. */
1021 COSTS_N_INSNS (2), /* arith_shift_reg. */
1023 COSTS_N_INSNS (1), /* log_shift_reg. */
1024 COSTS_N_INSNS (1), /* extend. */
1025 COSTS_N_INSNS (2), /* extend_arith. */
1026 COSTS_N_INSNS (1), /* bfi. */
1027 COSTS_N_INSNS (1), /* bfx. */
1031 true /* non_exec_costs_exec. */
1036 COSTS_N_INSNS (3), /* simple. */
1037 COSTS_N_INSNS (3), /* flag_setting. */
1038 COSTS_N_INSNS (2), /* extend. */
1039 COSTS_N_INSNS (3), /* add. */
1040 COSTS_N_INSNS (2), /* extend_add. */
1041 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1045 0, /* simple (N/A). */
1046 0, /* flag_setting (N/A). */
1047 COSTS_N_INSNS (4), /* extend. */
1049 COSTS_N_INSNS (4), /* extend_add. */
1055 COSTS_N_INSNS (2), /* load. */
1056 COSTS_N_INSNS (2), /* load_sign_extend. */
1057 COSTS_N_INSNS (2), /* ldrd. */
1058 COSTS_N_INSNS (2), /* ldm_1st. */
1059 1, /* ldm_regs_per_insn_1st. */
1060 2, /* ldm_regs_per_insn_subsequent. */
1061 COSTS_N_INSNS (5), /* loadf. */
1062 COSTS_N_INSNS (5), /* loadd. */
1063 COSTS_N_INSNS (1), /* load_unaligned. */
1064 COSTS_N_INSNS (2), /* store. */
1065 COSTS_N_INSNS (2), /* strd. */
1066 COSTS_N_INSNS (2), /* stm_1st. */
1067 1, /* stm_regs_per_insn_1st. */
1068 2, /* stm_regs_per_insn_subsequent. */
1069 COSTS_N_INSNS (1), /* storef. */
1070 COSTS_N_INSNS (1), /* stored. */
1071 COSTS_N_INSNS (1), /* store_unaligned. */
1072 COSTS_N_INSNS (1), /* loadv. */
1073 COSTS_N_INSNS (1) /* storev. */
1078 COSTS_N_INSNS (14), /* div. */
1079 COSTS_N_INSNS (4), /* mult. */
1080 COSTS_N_INSNS (7), /* mult_addsub. */
1081 COSTS_N_INSNS (30), /* fma. */
1082 COSTS_N_INSNS (3), /* addsub. */
1083 COSTS_N_INSNS (1), /* fpconst. */
1084 COSTS_N_INSNS (1), /* neg. */
1085 COSTS_N_INSNS (3), /* compare. */
1086 COSTS_N_INSNS (3), /* widen. */
1087 COSTS_N_INSNS (3), /* narrow. */
1088 COSTS_N_INSNS (3), /* toint. */
1089 COSTS_N_INSNS (3), /* fromint. */
1090 COSTS_N_INSNS (3) /* roundint. */
1094 COSTS_N_INSNS (24), /* div. */
1095 COSTS_N_INSNS (5), /* mult. */
1096 COSTS_N_INSNS (8), /* mult_addsub. */
1097 COSTS_N_INSNS (30), /* fma. */
1098 COSTS_N_INSNS (3), /* addsub. */
1099 COSTS_N_INSNS (1), /* fpconst. */
1100 COSTS_N_INSNS (1), /* neg. */
1101 COSTS_N_INSNS (3), /* compare. */
1102 COSTS_N_INSNS (3), /* widen. */
1103 COSTS_N_INSNS (3), /* narrow. */
1104 COSTS_N_INSNS (3), /* toint. */
1105 COSTS_N_INSNS (3), /* fromint. */
1106 COSTS_N_INSNS (3) /* roundint. */
1111 COSTS_N_INSNS (1) /* alu. */
1115 const struct cpu_cost_table cortexa8_extra_costs
=
1121 COSTS_N_INSNS (1), /* shift. */
1123 COSTS_N_INSNS (1), /* arith_shift. */
1124 0, /* arith_shift_reg. */
1125 COSTS_N_INSNS (1), /* log_shift. */
1126 0, /* log_shift_reg. */
1128 0, /* extend_arith. */
1134 true /* non_exec_costs_exec. */
1139 COSTS_N_INSNS (1), /* simple. */
1140 COSTS_N_INSNS (1), /* flag_setting. */
1141 COSTS_N_INSNS (1), /* extend. */
1142 COSTS_N_INSNS (1), /* add. */
1143 COSTS_N_INSNS (1), /* extend_add. */
1144 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1148 0, /* simple (N/A). */
1149 0, /* flag_setting (N/A). */
1150 COSTS_N_INSNS (2), /* extend. */
1152 COSTS_N_INSNS (2), /* extend_add. */
1158 COSTS_N_INSNS (1), /* load. */
1159 COSTS_N_INSNS (1), /* load_sign_extend. */
1160 COSTS_N_INSNS (1), /* ldrd. */
1161 COSTS_N_INSNS (1), /* ldm_1st. */
1162 1, /* ldm_regs_per_insn_1st. */
1163 2, /* ldm_regs_per_insn_subsequent. */
1164 COSTS_N_INSNS (1), /* loadf. */
1165 COSTS_N_INSNS (1), /* loadd. */
1166 COSTS_N_INSNS (1), /* load_unaligned. */
1167 COSTS_N_INSNS (1), /* store. */
1168 COSTS_N_INSNS (1), /* strd. */
1169 COSTS_N_INSNS (1), /* stm_1st. */
1170 1, /* stm_regs_per_insn_1st. */
1171 2, /* stm_regs_per_insn_subsequent. */
1172 COSTS_N_INSNS (1), /* storef. */
1173 COSTS_N_INSNS (1), /* stored. */
1174 COSTS_N_INSNS (1), /* store_unaligned. */
1175 COSTS_N_INSNS (1), /* loadv. */
1176 COSTS_N_INSNS (1) /* storev. */
1181 COSTS_N_INSNS (36), /* div. */
1182 COSTS_N_INSNS (11), /* mult. */
1183 COSTS_N_INSNS (20), /* mult_addsub. */
1184 COSTS_N_INSNS (30), /* fma. */
1185 COSTS_N_INSNS (9), /* addsub. */
1186 COSTS_N_INSNS (3), /* fpconst. */
1187 COSTS_N_INSNS (3), /* neg. */
1188 COSTS_N_INSNS (6), /* compare. */
1189 COSTS_N_INSNS (4), /* widen. */
1190 COSTS_N_INSNS (4), /* narrow. */
1191 COSTS_N_INSNS (8), /* toint. */
1192 COSTS_N_INSNS (8), /* fromint. */
1193 COSTS_N_INSNS (8) /* roundint. */
1197 COSTS_N_INSNS (64), /* div. */
1198 COSTS_N_INSNS (16), /* mult. */
1199 COSTS_N_INSNS (25), /* mult_addsub. */
1200 COSTS_N_INSNS (30), /* fma. */
1201 COSTS_N_INSNS (9), /* addsub. */
1202 COSTS_N_INSNS (3), /* fpconst. */
1203 COSTS_N_INSNS (3), /* neg. */
1204 COSTS_N_INSNS (6), /* compare. */
1205 COSTS_N_INSNS (6), /* widen. */
1206 COSTS_N_INSNS (6), /* narrow. */
1207 COSTS_N_INSNS (8), /* toint. */
1208 COSTS_N_INSNS (8), /* fromint. */
1209 COSTS_N_INSNS (8) /* roundint. */
1214 COSTS_N_INSNS (1) /* alu. */
1218 const struct cpu_cost_table cortexa5_extra_costs
=
1224 COSTS_N_INSNS (1), /* shift. */
1225 COSTS_N_INSNS (1), /* shift_reg. */
1226 COSTS_N_INSNS (1), /* arith_shift. */
1227 COSTS_N_INSNS (1), /* arith_shift_reg. */
1228 COSTS_N_INSNS (1), /* log_shift. */
1229 COSTS_N_INSNS (1), /* log_shift_reg. */
1230 COSTS_N_INSNS (1), /* extend. */
1231 COSTS_N_INSNS (1), /* extend_arith. */
1232 COSTS_N_INSNS (1), /* bfi. */
1233 COSTS_N_INSNS (1), /* bfx. */
1234 COSTS_N_INSNS (1), /* clz. */
1235 COSTS_N_INSNS (1), /* rev. */
1237 true /* non_exec_costs_exec. */
1244 COSTS_N_INSNS (1), /* flag_setting. */
1245 COSTS_N_INSNS (1), /* extend. */
1246 COSTS_N_INSNS (1), /* add. */
1247 COSTS_N_INSNS (1), /* extend_add. */
1248 COSTS_N_INSNS (7) /* idiv. */
1252 0, /* simple (N/A). */
1253 0, /* flag_setting (N/A). */
1254 COSTS_N_INSNS (1), /* extend. */
1256 COSTS_N_INSNS (2), /* extend_add. */
1262 COSTS_N_INSNS (1), /* load. */
1263 COSTS_N_INSNS (1), /* load_sign_extend. */
1264 COSTS_N_INSNS (6), /* ldrd. */
1265 COSTS_N_INSNS (1), /* ldm_1st. */
1266 1, /* ldm_regs_per_insn_1st. */
1267 2, /* ldm_regs_per_insn_subsequent. */
1268 COSTS_N_INSNS (2), /* loadf. */
1269 COSTS_N_INSNS (4), /* loadd. */
1270 COSTS_N_INSNS (1), /* load_unaligned. */
1271 COSTS_N_INSNS (1), /* store. */
1272 COSTS_N_INSNS (3), /* strd. */
1273 COSTS_N_INSNS (1), /* stm_1st. */
1274 1, /* stm_regs_per_insn_1st. */
1275 2, /* stm_regs_per_insn_subsequent. */
1276 COSTS_N_INSNS (2), /* storef. */
1277 COSTS_N_INSNS (2), /* stored. */
1278 COSTS_N_INSNS (1), /* store_unaligned. */
1279 COSTS_N_INSNS (1), /* loadv. */
1280 COSTS_N_INSNS (1) /* storev. */
1285 COSTS_N_INSNS (15), /* div. */
1286 COSTS_N_INSNS (3), /* mult. */
1287 COSTS_N_INSNS (7), /* mult_addsub. */
1288 COSTS_N_INSNS (7), /* fma. */
1289 COSTS_N_INSNS (3), /* addsub. */
1290 COSTS_N_INSNS (3), /* fpconst. */
1291 COSTS_N_INSNS (3), /* neg. */
1292 COSTS_N_INSNS (3), /* compare. */
1293 COSTS_N_INSNS (3), /* widen. */
1294 COSTS_N_INSNS (3), /* narrow. */
1295 COSTS_N_INSNS (3), /* toint. */
1296 COSTS_N_INSNS (3), /* fromint. */
1297 COSTS_N_INSNS (3) /* roundint. */
1301 COSTS_N_INSNS (30), /* div. */
1302 COSTS_N_INSNS (6), /* mult. */
1303 COSTS_N_INSNS (10), /* mult_addsub. */
1304 COSTS_N_INSNS (7), /* fma. */
1305 COSTS_N_INSNS (3), /* addsub. */
1306 COSTS_N_INSNS (3), /* fpconst. */
1307 COSTS_N_INSNS (3), /* neg. */
1308 COSTS_N_INSNS (3), /* compare. */
1309 COSTS_N_INSNS (3), /* widen. */
1310 COSTS_N_INSNS (3), /* narrow. */
1311 COSTS_N_INSNS (3), /* toint. */
1312 COSTS_N_INSNS (3), /* fromint. */
1313 COSTS_N_INSNS (3) /* roundint. */
1318 COSTS_N_INSNS (1) /* alu. */
1323 const struct cpu_cost_table cortexa7_extra_costs
=
1329 COSTS_N_INSNS (1), /* shift. */
1330 COSTS_N_INSNS (1), /* shift_reg. */
1331 COSTS_N_INSNS (1), /* arith_shift. */
1332 COSTS_N_INSNS (1), /* arith_shift_reg. */
1333 COSTS_N_INSNS (1), /* log_shift. */
1334 COSTS_N_INSNS (1), /* log_shift_reg. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* extend_arith. */
1337 COSTS_N_INSNS (1), /* bfi. */
1338 COSTS_N_INSNS (1), /* bfx. */
1339 COSTS_N_INSNS (1), /* clz. */
1340 COSTS_N_INSNS (1), /* rev. */
1342 true /* non_exec_costs_exec. */
1349 COSTS_N_INSNS (1), /* flag_setting. */
1350 COSTS_N_INSNS (1), /* extend. */
1351 COSTS_N_INSNS (1), /* add. */
1352 COSTS_N_INSNS (1), /* extend_add. */
1353 COSTS_N_INSNS (7) /* idiv. */
1357 0, /* simple (N/A). */
1358 0, /* flag_setting (N/A). */
1359 COSTS_N_INSNS (1), /* extend. */
1361 COSTS_N_INSNS (2), /* extend_add. */
1367 COSTS_N_INSNS (1), /* load. */
1368 COSTS_N_INSNS (1), /* load_sign_extend. */
1369 COSTS_N_INSNS (3), /* ldrd. */
1370 COSTS_N_INSNS (1), /* ldm_1st. */
1371 1, /* ldm_regs_per_insn_1st. */
1372 2, /* ldm_regs_per_insn_subsequent. */
1373 COSTS_N_INSNS (2), /* loadf. */
1374 COSTS_N_INSNS (2), /* loadd. */
1375 COSTS_N_INSNS (1), /* load_unaligned. */
1376 COSTS_N_INSNS (1), /* store. */
1377 COSTS_N_INSNS (3), /* strd. */
1378 COSTS_N_INSNS (1), /* stm_1st. */
1379 1, /* stm_regs_per_insn_1st. */
1380 2, /* stm_regs_per_insn_subsequent. */
1381 COSTS_N_INSNS (2), /* storef. */
1382 COSTS_N_INSNS (2), /* stored. */
1383 COSTS_N_INSNS (1), /* store_unaligned. */
1384 COSTS_N_INSNS (1), /* loadv. */
1385 COSTS_N_INSNS (1) /* storev. */
1390 COSTS_N_INSNS (15), /* div. */
1391 COSTS_N_INSNS (3), /* mult. */
1392 COSTS_N_INSNS (7), /* mult_addsub. */
1393 COSTS_N_INSNS (7), /* fma. */
1394 COSTS_N_INSNS (3), /* addsub. */
1395 COSTS_N_INSNS (3), /* fpconst. */
1396 COSTS_N_INSNS (3), /* neg. */
1397 COSTS_N_INSNS (3), /* compare. */
1398 COSTS_N_INSNS (3), /* widen. */
1399 COSTS_N_INSNS (3), /* narrow. */
1400 COSTS_N_INSNS (3), /* toint. */
1401 COSTS_N_INSNS (3), /* fromint. */
1402 COSTS_N_INSNS (3) /* roundint. */
1406 COSTS_N_INSNS (30), /* div. */
1407 COSTS_N_INSNS (6), /* mult. */
1408 COSTS_N_INSNS (10), /* mult_addsub. */
1409 COSTS_N_INSNS (7), /* fma. */
1410 COSTS_N_INSNS (3), /* addsub. */
1411 COSTS_N_INSNS (3), /* fpconst. */
1412 COSTS_N_INSNS (3), /* neg. */
1413 COSTS_N_INSNS (3), /* compare. */
1414 COSTS_N_INSNS (3), /* widen. */
1415 COSTS_N_INSNS (3), /* narrow. */
1416 COSTS_N_INSNS (3), /* toint. */
1417 COSTS_N_INSNS (3), /* fromint. */
1418 COSTS_N_INSNS (3) /* roundint. */
1423 COSTS_N_INSNS (1) /* alu. */
1427 const struct cpu_cost_table cortexa12_extra_costs
=
1434 COSTS_N_INSNS (1), /* shift_reg. */
1435 COSTS_N_INSNS (1), /* arith_shift. */
1436 COSTS_N_INSNS (1), /* arith_shift_reg. */
1437 COSTS_N_INSNS (1), /* log_shift. */
1438 COSTS_N_INSNS (1), /* log_shift_reg. */
1440 COSTS_N_INSNS (1), /* extend_arith. */
1442 COSTS_N_INSNS (1), /* bfx. */
1443 COSTS_N_INSNS (1), /* clz. */
1444 COSTS_N_INSNS (1), /* rev. */
1446 true /* non_exec_costs_exec. */
1451 COSTS_N_INSNS (2), /* simple. */
1452 COSTS_N_INSNS (3), /* flag_setting. */
1453 COSTS_N_INSNS (2), /* extend. */
1454 COSTS_N_INSNS (3), /* add. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1456 COSTS_N_INSNS (18) /* idiv. */
1460 0, /* simple (N/A). */
1461 0, /* flag_setting (N/A). */
1462 COSTS_N_INSNS (3), /* extend. */
1464 COSTS_N_INSNS (3), /* extend_add. */
1470 COSTS_N_INSNS (3), /* load. */
1471 COSTS_N_INSNS (3), /* load_sign_extend. */
1472 COSTS_N_INSNS (3), /* ldrd. */
1473 COSTS_N_INSNS (3), /* ldm_1st. */
1474 1, /* ldm_regs_per_insn_1st. */
1475 2, /* ldm_regs_per_insn_subsequent. */
1476 COSTS_N_INSNS (3), /* loadf. */
1477 COSTS_N_INSNS (3), /* loadd. */
1478 0, /* load_unaligned. */
1482 1, /* stm_regs_per_insn_1st. */
1483 2, /* stm_regs_per_insn_subsequent. */
1484 COSTS_N_INSNS (2), /* storef. */
1485 COSTS_N_INSNS (2), /* stored. */
1486 0, /* store_unaligned. */
1487 COSTS_N_INSNS (1), /* loadv. */
1488 COSTS_N_INSNS (1) /* storev. */
1493 COSTS_N_INSNS (17), /* div. */
1494 COSTS_N_INSNS (4), /* mult. */
1495 COSTS_N_INSNS (8), /* mult_addsub. */
1496 COSTS_N_INSNS (8), /* fma. */
1497 COSTS_N_INSNS (4), /* addsub. */
1498 COSTS_N_INSNS (2), /* fpconst. */
1499 COSTS_N_INSNS (2), /* neg. */
1500 COSTS_N_INSNS (2), /* compare. */
1501 COSTS_N_INSNS (4), /* widen. */
1502 COSTS_N_INSNS (4), /* narrow. */
1503 COSTS_N_INSNS (4), /* toint. */
1504 COSTS_N_INSNS (4), /* fromint. */
1505 COSTS_N_INSNS (4) /* roundint. */
1509 COSTS_N_INSNS (31), /* div. */
1510 COSTS_N_INSNS (4), /* mult. */
1511 COSTS_N_INSNS (8), /* mult_addsub. */
1512 COSTS_N_INSNS (8), /* fma. */
1513 COSTS_N_INSNS (4), /* addsub. */
1514 COSTS_N_INSNS (2), /* fpconst. */
1515 COSTS_N_INSNS (2), /* neg. */
1516 COSTS_N_INSNS (2), /* compare. */
1517 COSTS_N_INSNS (4), /* widen. */
1518 COSTS_N_INSNS (4), /* narrow. */
1519 COSTS_N_INSNS (4), /* toint. */
1520 COSTS_N_INSNS (4), /* fromint. */
1521 COSTS_N_INSNS (4) /* roundint. */
1526 COSTS_N_INSNS (1) /* alu. */
1530 const struct cpu_cost_table cortexa15_extra_costs
=
1538 COSTS_N_INSNS (1), /* arith_shift. */
1539 COSTS_N_INSNS (1), /* arith_shift_reg. */
1540 COSTS_N_INSNS (1), /* log_shift. */
1541 COSTS_N_INSNS (1), /* log_shift_reg. */
1543 COSTS_N_INSNS (1), /* extend_arith. */
1544 COSTS_N_INSNS (1), /* bfi. */
1549 true /* non_exec_costs_exec. */
1554 COSTS_N_INSNS (2), /* simple. */
1555 COSTS_N_INSNS (3), /* flag_setting. */
1556 COSTS_N_INSNS (2), /* extend. */
1557 COSTS_N_INSNS (2), /* add. */
1558 COSTS_N_INSNS (2), /* extend_add. */
1559 COSTS_N_INSNS (18) /* idiv. */
1563 0, /* simple (N/A). */
1564 0, /* flag_setting (N/A). */
1565 COSTS_N_INSNS (3), /* extend. */
1567 COSTS_N_INSNS (3), /* extend_add. */
1573 COSTS_N_INSNS (3), /* load. */
1574 COSTS_N_INSNS (3), /* load_sign_extend. */
1575 COSTS_N_INSNS (3), /* ldrd. */
1576 COSTS_N_INSNS (4), /* ldm_1st. */
1577 1, /* ldm_regs_per_insn_1st. */
1578 2, /* ldm_regs_per_insn_subsequent. */
1579 COSTS_N_INSNS (4), /* loadf. */
1580 COSTS_N_INSNS (4), /* loadd. */
1581 0, /* load_unaligned. */
1584 COSTS_N_INSNS (1), /* stm_1st. */
1585 1, /* stm_regs_per_insn_1st. */
1586 2, /* stm_regs_per_insn_subsequent. */
1589 0, /* store_unaligned. */
1590 COSTS_N_INSNS (1), /* loadv. */
1591 COSTS_N_INSNS (1) /* storev. */
1596 COSTS_N_INSNS (17), /* div. */
1597 COSTS_N_INSNS (4), /* mult. */
1598 COSTS_N_INSNS (8), /* mult_addsub. */
1599 COSTS_N_INSNS (8), /* fma. */
1600 COSTS_N_INSNS (4), /* addsub. */
1601 COSTS_N_INSNS (2), /* fpconst. */
1602 COSTS_N_INSNS (2), /* neg. */
1603 COSTS_N_INSNS (5), /* compare. */
1604 COSTS_N_INSNS (4), /* widen. */
1605 COSTS_N_INSNS (4), /* narrow. */
1606 COSTS_N_INSNS (4), /* toint. */
1607 COSTS_N_INSNS (4), /* fromint. */
1608 COSTS_N_INSNS (4) /* roundint. */
1612 COSTS_N_INSNS (31), /* div. */
1613 COSTS_N_INSNS (4), /* mult. */
1614 COSTS_N_INSNS (8), /* mult_addsub. */
1615 COSTS_N_INSNS (8), /* fma. */
1616 COSTS_N_INSNS (4), /* addsub. */
1617 COSTS_N_INSNS (2), /* fpconst. */
1618 COSTS_N_INSNS (2), /* neg. */
1619 COSTS_N_INSNS (2), /* compare. */
1620 COSTS_N_INSNS (4), /* widen. */
1621 COSTS_N_INSNS (4), /* narrow. */
1622 COSTS_N_INSNS (4), /* toint. */
1623 COSTS_N_INSNS (4), /* fromint. */
1624 COSTS_N_INSNS (4) /* roundint. */
1629 COSTS_N_INSNS (1) /* alu. */
1633 const struct cpu_cost_table v7m_extra_costs
=
1641 0, /* arith_shift. */
1642 COSTS_N_INSNS (1), /* arith_shift_reg. */
1644 COSTS_N_INSNS (1), /* log_shift_reg. */
1646 COSTS_N_INSNS (1), /* extend_arith. */
1651 COSTS_N_INSNS (1), /* non_exec. */
1652 false /* non_exec_costs_exec. */
1657 COSTS_N_INSNS (1), /* simple. */
1658 COSTS_N_INSNS (1), /* flag_setting. */
1659 COSTS_N_INSNS (2), /* extend. */
1660 COSTS_N_INSNS (1), /* add. */
1661 COSTS_N_INSNS (3), /* extend_add. */
1662 COSTS_N_INSNS (8) /* idiv. */
1666 0, /* simple (N/A). */
1667 0, /* flag_setting (N/A). */
1668 COSTS_N_INSNS (2), /* extend. */
1670 COSTS_N_INSNS (3), /* extend_add. */
1676 COSTS_N_INSNS (2), /* load. */
1677 0, /* load_sign_extend. */
1678 COSTS_N_INSNS (3), /* ldrd. */
1679 COSTS_N_INSNS (2), /* ldm_1st. */
1680 1, /* ldm_regs_per_insn_1st. */
1681 1, /* ldm_regs_per_insn_subsequent. */
1682 COSTS_N_INSNS (2), /* loadf. */
1683 COSTS_N_INSNS (3), /* loadd. */
1684 COSTS_N_INSNS (1), /* load_unaligned. */
1685 COSTS_N_INSNS (2), /* store. */
1686 COSTS_N_INSNS (3), /* strd. */
1687 COSTS_N_INSNS (2), /* stm_1st. */
1688 1, /* stm_regs_per_insn_1st. */
1689 1, /* stm_regs_per_insn_subsequent. */
1690 COSTS_N_INSNS (2), /* storef. */
1691 COSTS_N_INSNS (3), /* stored. */
1692 COSTS_N_INSNS (1), /* store_unaligned. */
1693 COSTS_N_INSNS (1), /* loadv. */
1694 COSTS_N_INSNS (1) /* storev. */
1699 COSTS_N_INSNS (7), /* div. */
1700 COSTS_N_INSNS (2), /* mult. */
1701 COSTS_N_INSNS (5), /* mult_addsub. */
1702 COSTS_N_INSNS (3), /* fma. */
1703 COSTS_N_INSNS (1), /* addsub. */
1715 COSTS_N_INSNS (15), /* div. */
1716 COSTS_N_INSNS (5), /* mult. */
1717 COSTS_N_INSNS (7), /* mult_addsub. */
1718 COSTS_N_INSNS (7), /* fma. */
1719 COSTS_N_INSNS (3), /* addsub. */
1732 COSTS_N_INSNS (1) /* alu. */
1736 const struct tune_params arm_slowmul_tune
=
1738 &generic_extra_costs
, /* Insn extra costs. */
1739 NULL
, /* Sched adj cost. */
1740 arm_default_branch_cost
,
1741 &arm_default_vec_cost
,
1742 3, /* Constant limit. */
1743 5, /* Max cond insns. */
1744 8, /* Memset max inline. */
1745 1, /* Issue rate. */
1746 ARM_PREFETCH_NOT_BENEFICIAL
,
1747 tune_params::PREF_CONST_POOL_TRUE
,
1748 tune_params::PREF_LDRD_FALSE
,
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1750 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1751 tune_params::DISPARAGE_FLAGS_NEITHER
,
1752 tune_params::PREF_NEON_64_FALSE
,
1753 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1754 tune_params::FUSE_NOTHING
,
1755 tune_params::SCHED_AUTOPREF_OFF
1758 const struct tune_params arm_fastmul_tune
=
1760 &generic_extra_costs
, /* Insn extra costs. */
1761 NULL
, /* Sched adj cost. */
1762 arm_default_branch_cost
,
1763 &arm_default_vec_cost
,
1764 1, /* Constant limit. */
1765 5, /* Max cond insns. */
1766 8, /* Memset max inline. */
1767 1, /* Issue rate. */
1768 ARM_PREFETCH_NOT_BENEFICIAL
,
1769 tune_params::PREF_CONST_POOL_TRUE
,
1770 tune_params::PREF_LDRD_FALSE
,
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1773 tune_params::DISPARAGE_FLAGS_NEITHER
,
1774 tune_params::PREF_NEON_64_FALSE
,
1775 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1776 tune_params::FUSE_NOTHING
,
1777 tune_params::SCHED_AUTOPREF_OFF
1780 /* StrongARM has early execution of branches, so a sequence that is worth
1781 skipping is shorter. Set max_insns_skipped to a lower value. */
1783 const struct tune_params arm_strongarm_tune
=
1785 &generic_extra_costs
, /* Insn extra costs. */
1786 NULL
, /* Sched adj cost. */
1787 arm_default_branch_cost
,
1788 &arm_default_vec_cost
,
1789 1, /* Constant limit. */
1790 3, /* Max cond insns. */
1791 8, /* Memset max inline. */
1792 1, /* Issue rate. */
1793 ARM_PREFETCH_NOT_BENEFICIAL
,
1794 tune_params::PREF_CONST_POOL_TRUE
,
1795 tune_params::PREF_LDRD_FALSE
,
1796 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1797 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1798 tune_params::DISPARAGE_FLAGS_NEITHER
,
1799 tune_params::PREF_NEON_64_FALSE
,
1800 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1801 tune_params::FUSE_NOTHING
,
1802 tune_params::SCHED_AUTOPREF_OFF
1805 const struct tune_params arm_xscale_tune
=
1807 &generic_extra_costs
, /* Insn extra costs. */
1808 xscale_sched_adjust_cost
,
1809 arm_default_branch_cost
,
1810 &arm_default_vec_cost
,
1811 2, /* Constant limit. */
1812 3, /* Max cond insns. */
1813 8, /* Memset max inline. */
1814 1, /* Issue rate. */
1815 ARM_PREFETCH_NOT_BENEFICIAL
,
1816 tune_params::PREF_CONST_POOL_TRUE
,
1817 tune_params::PREF_LDRD_FALSE
,
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1819 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1820 tune_params::DISPARAGE_FLAGS_NEITHER
,
1821 tune_params::PREF_NEON_64_FALSE
,
1822 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1823 tune_params::FUSE_NOTHING
,
1824 tune_params::SCHED_AUTOPREF_OFF
1827 const struct tune_params arm_9e_tune
=
1829 &generic_extra_costs
, /* Insn extra costs. */
1830 NULL
, /* Sched adj cost. */
1831 arm_default_branch_cost
,
1832 &arm_default_vec_cost
,
1833 1, /* Constant limit. */
1834 5, /* Max cond insns. */
1835 8, /* Memset max inline. */
1836 1, /* Issue rate. */
1837 ARM_PREFETCH_NOT_BENEFICIAL
,
1838 tune_params::PREF_CONST_POOL_TRUE
,
1839 tune_params::PREF_LDRD_FALSE
,
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1842 tune_params::DISPARAGE_FLAGS_NEITHER
,
1843 tune_params::PREF_NEON_64_FALSE
,
1844 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1845 tune_params::FUSE_NOTHING
,
1846 tune_params::SCHED_AUTOPREF_OFF
1849 const struct tune_params arm_marvell_pj4_tune
=
1851 &generic_extra_costs
, /* Insn extra costs. */
1852 NULL
, /* Sched adj cost. */
1853 arm_default_branch_cost
,
1854 &arm_default_vec_cost
,
1855 1, /* Constant limit. */
1856 5, /* Max cond insns. */
1857 8, /* Memset max inline. */
1858 2, /* Issue rate. */
1859 ARM_PREFETCH_NOT_BENEFICIAL
,
1860 tune_params::PREF_CONST_POOL_TRUE
,
1861 tune_params::PREF_LDRD_FALSE
,
1862 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1863 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1864 tune_params::DISPARAGE_FLAGS_NEITHER
,
1865 tune_params::PREF_NEON_64_FALSE
,
1866 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1867 tune_params::FUSE_NOTHING
,
1868 tune_params::SCHED_AUTOPREF_OFF
1871 const struct tune_params arm_v6t2_tune
=
1873 &generic_extra_costs
, /* Insn extra costs. */
1874 NULL
, /* Sched adj cost. */
1875 arm_default_branch_cost
,
1876 &arm_default_vec_cost
,
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 8, /* Memset max inline. */
1880 1, /* Issue rate. */
1881 ARM_PREFETCH_NOT_BENEFICIAL
,
1882 tune_params::PREF_CONST_POOL_FALSE
,
1883 tune_params::PREF_LDRD_FALSE
,
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1885 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1886 tune_params::DISPARAGE_FLAGS_NEITHER
,
1887 tune_params::PREF_NEON_64_FALSE
,
1888 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1889 tune_params::FUSE_NOTHING
,
1890 tune_params::SCHED_AUTOPREF_OFF
1894 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1895 const struct tune_params arm_cortex_tune
=
1897 &generic_extra_costs
,
1898 NULL
, /* Sched adj cost. */
1899 arm_default_branch_cost
,
1900 &arm_default_vec_cost
,
1901 1, /* Constant limit. */
1902 5, /* Max cond insns. */
1903 8, /* Memset max inline. */
1904 2, /* Issue rate. */
1905 ARM_PREFETCH_NOT_BENEFICIAL
,
1906 tune_params::PREF_CONST_POOL_FALSE
,
1907 tune_params::PREF_LDRD_FALSE
,
1908 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1909 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1910 tune_params::DISPARAGE_FLAGS_NEITHER
,
1911 tune_params::PREF_NEON_64_FALSE
,
1912 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1913 tune_params::FUSE_NOTHING
,
1914 tune_params::SCHED_AUTOPREF_OFF
1917 const struct tune_params arm_cortex_a8_tune
=
1919 &cortexa8_extra_costs
,
1920 NULL
, /* Sched adj cost. */
1921 arm_default_branch_cost
,
1922 &arm_default_vec_cost
,
1923 1, /* Constant limit. */
1924 5, /* Max cond insns. */
1925 8, /* Memset max inline. */
1926 2, /* Issue rate. */
1927 ARM_PREFETCH_NOT_BENEFICIAL
,
1928 tune_params::PREF_CONST_POOL_FALSE
,
1929 tune_params::PREF_LDRD_FALSE
,
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1931 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1932 tune_params::DISPARAGE_FLAGS_NEITHER
,
1933 tune_params::PREF_NEON_64_FALSE
,
1934 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1935 tune_params::FUSE_NOTHING
,
1936 tune_params::SCHED_AUTOPREF_OFF
1939 const struct tune_params arm_cortex_a7_tune
=
1941 &cortexa7_extra_costs
,
1942 NULL
, /* Sched adj cost. */
1943 arm_default_branch_cost
,
1944 &arm_default_vec_cost
,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 2, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL
,
1950 tune_params::PREF_CONST_POOL_FALSE
,
1951 tune_params::PREF_LDRD_FALSE
,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER
,
1955 tune_params::PREF_NEON_64_FALSE
,
1956 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1957 tune_params::FUSE_NOTHING
,
1958 tune_params::SCHED_AUTOPREF_OFF
1961 const struct tune_params arm_cortex_a15_tune
=
1963 &cortexa15_extra_costs
,
1964 NULL
, /* Sched adj cost. */
1965 arm_default_branch_cost
,
1966 &arm_default_vec_cost
,
1967 1, /* Constant limit. */
1968 2, /* Max cond insns. */
1969 8, /* Memset max inline. */
1970 3, /* Issue rate. */
1971 ARM_PREFETCH_NOT_BENEFICIAL
,
1972 tune_params::PREF_CONST_POOL_FALSE
,
1973 tune_params::PREF_LDRD_TRUE
,
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1975 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1976 tune_params::DISPARAGE_FLAGS_ALL
,
1977 tune_params::PREF_NEON_64_FALSE
,
1978 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1979 tune_params::FUSE_NOTHING
,
1980 tune_params::SCHED_AUTOPREF_FULL
1983 const struct tune_params arm_cortex_a35_tune
=
1985 &cortexa53_extra_costs
,
1986 NULL
, /* Sched adj cost. */
1987 arm_default_branch_cost
,
1988 &arm_default_vec_cost
,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 1, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL
,
1994 tune_params::PREF_CONST_POOL_FALSE
,
1995 tune_params::PREF_LDRD_FALSE
,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER
,
1999 tune_params::PREF_NEON_64_FALSE
,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2001 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2002 tune_params::SCHED_AUTOPREF_OFF
2005 const struct tune_params arm_cortex_a53_tune
=
2007 &cortexa53_extra_costs
,
2008 NULL
, /* Sched adj cost. */
2009 arm_default_branch_cost
,
2010 &arm_default_vec_cost
,
2011 1, /* Constant limit. */
2012 5, /* Max cond insns. */
2013 8, /* Memset max inline. */
2014 2, /* Issue rate. */
2015 ARM_PREFETCH_NOT_BENEFICIAL
,
2016 tune_params::PREF_CONST_POOL_FALSE
,
2017 tune_params::PREF_LDRD_FALSE
,
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2020 tune_params::DISPARAGE_FLAGS_NEITHER
,
2021 tune_params::PREF_NEON_64_FALSE
,
2022 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2023 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2024 tune_params::SCHED_AUTOPREF_OFF
2027 const struct tune_params arm_cortex_a57_tune
=
2029 &cortexa57_extra_costs
,
2030 NULL
, /* Sched adj cost. */
2031 arm_default_branch_cost
,
2032 &arm_default_vec_cost
,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL
,
2038 tune_params::PREF_CONST_POOL_FALSE
,
2039 tune_params::PREF_LDRD_TRUE
,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL
,
2043 tune_params::PREF_NEON_64_FALSE
,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2045 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2046 tune_params::SCHED_AUTOPREF_FULL
2049 const struct tune_params arm_exynosm1_tune
=
2051 &exynosm1_extra_costs
,
2052 NULL
, /* Sched adj cost. */
2053 arm_default_branch_cost
,
2054 &arm_default_vec_cost
,
2055 1, /* Constant limit. */
2056 2, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 3, /* Issue rate. */
2059 ARM_PREFETCH_NOT_BENEFICIAL
,
2060 tune_params::PREF_CONST_POOL_FALSE
,
2061 tune_params::PREF_LDRD_TRUE
,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_ALL
,
2065 tune_params::PREF_NEON_64_FALSE
,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2067 tune_params::FUSE_NOTHING
,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_xgene1_tune
=
2073 &xgene1_extra_costs
,
2074 NULL
, /* Sched adj cost. */
2075 arm_default_branch_cost
,
2076 &arm_default_vec_cost
,
2077 1, /* Constant limit. */
2078 2, /* Max cond insns. */
2079 32, /* Memset max inline. */
2080 4, /* Issue rate. */
2081 ARM_PREFETCH_NOT_BENEFICIAL
,
2082 tune_params::PREF_CONST_POOL_FALSE
,
2083 tune_params::PREF_LDRD_TRUE
,
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2086 tune_params::DISPARAGE_FLAGS_ALL
,
2087 tune_params::PREF_NEON_64_FALSE
,
2088 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2089 tune_params::FUSE_NOTHING
,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_qdf24xx_tune
=
2095 &qdf24xx_extra_costs
,
2096 NULL
, /* Scheduler cost adjustment. */
2097 arm_default_branch_cost
,
2098 &arm_default_vec_cost
, /* Vectorizer costs. */
2099 1, /* Constant limit. */
2100 2, /* Max cond insns. */
2101 8, /* Memset max inline. */
2102 4, /* Issue rate. */
2103 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2104 tune_params::PREF_CONST_POOL_FALSE
,
2105 tune_params::PREF_LDRD_TRUE
,
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2108 tune_params::DISPARAGE_FLAGS_ALL
,
2109 tune_params::PREF_NEON_64_FALSE
,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2111 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2112 tune_params::SCHED_AUTOPREF_FULL
2115 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2116 less appealing. Set max_insns_skipped to a low value. */
2118 const struct tune_params arm_cortex_a5_tune
=
2120 &cortexa5_extra_costs
,
2121 NULL
, /* Sched adj cost. */
2122 arm_cortex_a5_branch_cost
,
2123 &arm_default_vec_cost
,
2124 1, /* Constant limit. */
2125 1, /* Max cond insns. */
2126 8, /* Memset max inline. */
2127 2, /* Issue rate. */
2128 ARM_PREFETCH_NOT_BENEFICIAL
,
2129 tune_params::PREF_CONST_POOL_FALSE
,
2130 tune_params::PREF_LDRD_FALSE
,
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2133 tune_params::DISPARAGE_FLAGS_NEITHER
,
2134 tune_params::PREF_NEON_64_FALSE
,
2135 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2136 tune_params::FUSE_NOTHING
,
2137 tune_params::SCHED_AUTOPREF_OFF
2140 const struct tune_params arm_cortex_a9_tune
=
2142 &cortexa9_extra_costs
,
2143 cortex_a9_sched_adjust_cost
,
2144 arm_default_branch_cost
,
2145 &arm_default_vec_cost
,
2146 1, /* Constant limit. */
2147 5, /* Max cond insns. */
2148 8, /* Memset max inline. */
2149 2, /* Issue rate. */
2150 ARM_PREFETCH_BENEFICIAL(4,32,32),
2151 tune_params::PREF_CONST_POOL_FALSE
,
2152 tune_params::PREF_LDRD_FALSE
,
2153 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2154 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2155 tune_params::DISPARAGE_FLAGS_NEITHER
,
2156 tune_params::PREF_NEON_64_FALSE
,
2157 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2158 tune_params::FUSE_NOTHING
,
2159 tune_params::SCHED_AUTOPREF_OFF
2162 const struct tune_params arm_cortex_a12_tune
=
2164 &cortexa12_extra_costs
,
2165 NULL
, /* Sched adj cost. */
2166 arm_default_branch_cost
,
2167 &arm_default_vec_cost
, /* Vectorizer costs. */
2168 1, /* Constant limit. */
2169 2, /* Max cond insns. */
2170 8, /* Memset max inline. */
2171 2, /* Issue rate. */
2172 ARM_PREFETCH_NOT_BENEFICIAL
,
2173 tune_params::PREF_CONST_POOL_FALSE
,
2174 tune_params::PREF_LDRD_TRUE
,
2175 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2176 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2177 tune_params::DISPARAGE_FLAGS_ALL
,
2178 tune_params::PREF_NEON_64_FALSE
,
2179 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2180 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2181 tune_params::SCHED_AUTOPREF_OFF
2184 const struct tune_params arm_cortex_a73_tune
=
2186 &cortexa57_extra_costs
,
2187 NULL
, /* Sched adj cost. */
2188 arm_default_branch_cost
,
2189 &arm_default_vec_cost
, /* Vectorizer costs. */
2190 1, /* Constant limit. */
2191 2, /* Max cond insns. */
2192 8, /* Memset max inline. */
2193 2, /* Issue rate. */
2194 ARM_PREFETCH_NOT_BENEFICIAL
,
2195 tune_params::PREF_CONST_POOL_FALSE
,
2196 tune_params::PREF_LDRD_TRUE
,
2197 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2198 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2199 tune_params::DISPARAGE_FLAGS_ALL
,
2200 tune_params::PREF_NEON_64_FALSE
,
2201 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2202 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2203 tune_params::SCHED_AUTOPREF_FULL
2206 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2207 cycle to execute each. An LDR from the constant pool also takes two cycles
2208 to execute, but mildly increases pipelining opportunity (consecutive
2209 loads/stores can be pipelined together, saving one cycle), and may also
2210 improve icache utilisation. Hence we prefer the constant pool for such
2213 const struct tune_params arm_v7m_tune
=
2216 NULL
, /* Sched adj cost. */
2217 arm_cortex_m_branch_cost
,
2218 &arm_default_vec_cost
,
2219 1, /* Constant limit. */
2220 2, /* Max cond insns. */
2221 8, /* Memset max inline. */
2222 1, /* Issue rate. */
2223 ARM_PREFETCH_NOT_BENEFICIAL
,
2224 tune_params::PREF_CONST_POOL_TRUE
,
2225 tune_params::PREF_LDRD_FALSE
,
2226 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2228 tune_params::DISPARAGE_FLAGS_NEITHER
,
2229 tune_params::PREF_NEON_64_FALSE
,
2230 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2231 tune_params::FUSE_NOTHING
,
2232 tune_params::SCHED_AUTOPREF_OFF
2235 /* Cortex-M7 tuning. */
2237 const struct tune_params arm_cortex_m7_tune
=
2240 NULL
, /* Sched adj cost. */
2241 arm_cortex_m7_branch_cost
,
2242 &arm_default_vec_cost
,
2243 0, /* Constant limit. */
2244 1, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL
,
2248 tune_params::PREF_CONST_POOL_TRUE
,
2249 tune_params::PREF_LDRD_FALSE
,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_NEITHER
,
2253 tune_params::PREF_NEON_64_FALSE
,
2254 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2255 tune_params::FUSE_NOTHING
,
2256 tune_params::SCHED_AUTOPREF_OFF
2259 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2260 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2262 const struct tune_params arm_v6m_tune
=
2264 &generic_extra_costs
, /* Insn extra costs. */
2265 NULL
, /* Sched adj cost. */
2266 arm_default_branch_cost
,
2267 &arm_default_vec_cost
, /* Vectorizer costs. */
2268 1, /* Constant limit. */
2269 5, /* Max cond insns. */
2270 8, /* Memset max inline. */
2271 1, /* Issue rate. */
2272 ARM_PREFETCH_NOT_BENEFICIAL
,
2273 tune_params::PREF_CONST_POOL_FALSE
,
2274 tune_params::PREF_LDRD_FALSE
,
2275 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2276 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2277 tune_params::DISPARAGE_FLAGS_NEITHER
,
2278 tune_params::PREF_NEON_64_FALSE
,
2279 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2280 tune_params::FUSE_NOTHING
,
2281 tune_params::SCHED_AUTOPREF_OFF
2284 const struct tune_params arm_fa726te_tune
=
2286 &generic_extra_costs
, /* Insn extra costs. */
2287 fa726te_sched_adjust_cost
,
2288 arm_default_branch_cost
,
2289 &arm_default_vec_cost
,
2290 1, /* Constant limit. */
2291 5, /* Max cond insns. */
2292 8, /* Memset max inline. */
2293 2, /* Issue rate. */
2294 ARM_PREFETCH_NOT_BENEFICIAL
,
2295 tune_params::PREF_CONST_POOL_TRUE
,
2296 tune_params::PREF_LDRD_FALSE
,
2297 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2298 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2299 tune_params::DISPARAGE_FLAGS_NEITHER
,
2300 tune_params::PREF_NEON_64_FALSE
,
2301 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2302 tune_params::FUSE_NOTHING
,
2303 tune_params::SCHED_AUTOPREF_OFF
2306 /* Auto-generated CPU, FPU and architecture tables. */
2307 #include "arm-cpu-data.h"
2309 /* The name of the preprocessor macro to define for this architecture. PROFILE
2310 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2311 is thus chosen to be big enough to hold the longest architecture name. */
2313 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2315 /* Supported TLS relocations. */
2323 TLS_DESCSEQ
/* GNU scheme */
2326 /* The maximum number of insns to be used when loading a constant. */
2328 arm_constant_limit (bool size_p
)
2330 return size_p
? 1 : current_tune
->constant_limit
;
2333 /* Emit an insn that's a simple single-set. Both the operands must be known
2335 inline static rtx_insn
*
2336 emit_set_insn (rtx x
, rtx y
)
2338 return emit_insn (gen_rtx_SET (x
, y
));
2341 /* Return the number of bits set in VALUE. */
2343 bit_count (unsigned long value
)
2345 unsigned long count
= 0;
2350 value
&= value
- 1; /* Clear the least-significant set bit. */
2356 /* Return the number of bits set in BMAP. */
2358 bitmap_popcount (const sbitmap bmap
)
2360 unsigned int count
= 0;
2362 sbitmap_iterator sbi
;
2364 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2373 } arm_fixed_mode_set
;
2375 /* A small helper for setting fixed-point library libfuncs. */
2378 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2379 const char *funcname
, const char *modename
,
2384 if (num_suffix
== 0)
2385 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2387 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2389 set_optab_libfunc (optable
, mode
, buffer
);
2393 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2394 machine_mode from
, const char *funcname
,
2395 const char *toname
, const char *fromname
)
2398 const char *maybe_suffix_2
= "";
2400 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2401 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2402 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2403 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2404 maybe_suffix_2
= "2";
2406 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2409 set_conv_libfunc (optable
, to
, from
, buffer
);
2412 /* Set up library functions unique to ARM. */
2415 arm_init_libfuncs (void)
2417 /* For Linux, we have access to kernel support for atomic operations. */
2418 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2419 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2421 /* There are no special library functions unless we are using the
2426 /* The functions below are described in Section 4 of the "Run-Time
2427 ABI for the ARM architecture", Version 1.0. */
2429 /* Double-precision floating-point arithmetic. Table 2. */
2430 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2431 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2432 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2433 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2434 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2436 /* Double-precision comparisons. Table 3. */
2437 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2438 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2439 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2440 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2441 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2442 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2443 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2445 /* Single-precision floating-point arithmetic. Table 4. */
2446 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2447 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2448 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2449 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2450 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2452 /* Single-precision comparisons. Table 5. */
2453 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2454 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2455 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2456 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2457 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2458 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2459 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2461 /* Floating-point to integer conversions. Table 6. */
2462 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2463 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2464 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2465 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2466 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2467 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2468 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2469 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2471 /* Conversions between floating types. Table 7. */
2472 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2473 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2475 /* Integer to floating-point conversions. Table 8. */
2476 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2477 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2478 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2479 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2480 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2481 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2482 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2483 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2485 /* Long long. Table 9. */
2486 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2487 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2488 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2489 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2490 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2491 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2492 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2493 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2495 /* Integer (32/32->32) division. \S 4.3.1. */
2496 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2497 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2499 /* The divmod functions are designed so that they can be used for
2500 plain division, even though they return both the quotient and the
2501 remainder. The quotient is returned in the usual location (i.e.,
2502 r0 for SImode, {r0, r1} for DImode), just as would be expected
2503 for an ordinary division routine. Because the AAPCS calling
2504 conventions specify that all of { r0, r1, r2, r3 } are
2505 callee-saved registers, there is no need to tell the compiler
2506 explicitly that those registers are clobbered by these
2508 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2509 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2511 /* For SImode division the ABI provides div-without-mod routines,
2512 which are faster. */
2513 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2514 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2516 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2517 divmod libcalls instead. */
2518 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2519 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2520 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2521 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2523 /* Half-precision float operations. The compiler handles all operations
2524 with NULL libfuncs by converting the SFmode. */
2525 switch (arm_fp16_format
)
2527 case ARM_FP16_FORMAT_IEEE
:
2528 case ARM_FP16_FORMAT_ALTERNATIVE
:
2531 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2532 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2534 : "__gnu_f2h_alternative"));
2535 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2536 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2538 : "__gnu_h2f_alternative"));
2540 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2541 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2543 : "__gnu_d2h_alternative"));
2546 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2547 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2548 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2549 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2550 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2553 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2554 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2559 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2566 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2568 const arm_fixed_mode_set fixed_arith_modes
[] =
2589 const arm_fixed_mode_set fixed_conv_modes
[] =
2619 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2621 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2622 "add", fixed_arith_modes
[i
].name
, 3);
2623 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2624 "ssadd", fixed_arith_modes
[i
].name
, 3);
2625 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2626 "usadd", fixed_arith_modes
[i
].name
, 3);
2627 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2628 "sub", fixed_arith_modes
[i
].name
, 3);
2629 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2630 "sssub", fixed_arith_modes
[i
].name
, 3);
2631 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2632 "ussub", fixed_arith_modes
[i
].name
, 3);
2633 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2634 "mul", fixed_arith_modes
[i
].name
, 3);
2635 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2636 "ssmul", fixed_arith_modes
[i
].name
, 3);
2637 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2638 "usmul", fixed_arith_modes
[i
].name
, 3);
2639 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2640 "div", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2642 "udiv", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2644 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2646 "usdiv", fixed_arith_modes
[i
].name
, 3);
2647 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2648 "neg", fixed_arith_modes
[i
].name
, 2);
2649 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2650 "ssneg", fixed_arith_modes
[i
].name
, 2);
2651 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2652 "usneg", fixed_arith_modes
[i
].name
, 2);
2653 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2654 "ashl", fixed_arith_modes
[i
].name
, 3);
2655 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2656 "ashr", fixed_arith_modes
[i
].name
, 3);
2657 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2658 "lshr", fixed_arith_modes
[i
].name
, 3);
2659 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2660 "ssashl", fixed_arith_modes
[i
].name
, 3);
2661 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2662 "usashl", fixed_arith_modes
[i
].name
, 3);
2663 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2664 "cmp", fixed_arith_modes
[i
].name
, 2);
2667 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2668 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2671 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2672 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2675 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2676 fixed_conv_modes
[j
].mode
, "fract",
2677 fixed_conv_modes
[i
].name
,
2678 fixed_conv_modes
[j
].name
);
2679 arm_set_fixed_conv_libfunc (satfract_optab
,
2680 fixed_conv_modes
[i
].mode
,
2681 fixed_conv_modes
[j
].mode
, "satfract",
2682 fixed_conv_modes
[i
].name
,
2683 fixed_conv_modes
[j
].name
);
2684 arm_set_fixed_conv_libfunc (fractuns_optab
,
2685 fixed_conv_modes
[i
].mode
,
2686 fixed_conv_modes
[j
].mode
, "fractuns",
2687 fixed_conv_modes
[i
].name
,
2688 fixed_conv_modes
[j
].name
);
2689 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2690 fixed_conv_modes
[i
].mode
,
2691 fixed_conv_modes
[j
].mode
, "satfractuns",
2692 fixed_conv_modes
[i
].name
,
2693 fixed_conv_modes
[j
].name
);
2697 if (TARGET_AAPCS_BASED
)
2698 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2701 /* On AAPCS systems, this is the "struct __va_list". */
2702 static GTY(()) tree va_list_type
;
2704 /* Return the type to use as __builtin_va_list. */
2706 arm_build_builtin_va_list (void)
2711 if (!TARGET_AAPCS_BASED
)
2712 return std_build_builtin_va_list ();
2714 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2722 The C Library ABI further reinforces this definition in \S
2725 We must follow this definition exactly. The structure tag
2726 name is visible in C++ mangled names, and thus forms a part
2727 of the ABI. The field name may be used by people who
2728 #include <stdarg.h>. */
2729 /* Create the type. */
2730 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2731 /* Give it the required name. */
2732 va_list_name
= build_decl (BUILTINS_LOCATION
,
2734 get_identifier ("__va_list"),
2736 DECL_ARTIFICIAL (va_list_name
) = 1;
2737 TYPE_NAME (va_list_type
) = va_list_name
;
2738 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2739 /* Create the __ap field. */
2740 ap_field
= build_decl (BUILTINS_LOCATION
,
2742 get_identifier ("__ap"),
2744 DECL_ARTIFICIAL (ap_field
) = 1;
2745 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2746 TYPE_FIELDS (va_list_type
) = ap_field
;
2747 /* Compute its layout. */
2748 layout_type (va_list_type
);
2750 return va_list_type
;
2753 /* Return an expression of type "void *" pointing to the next
2754 available argument in a variable-argument list. VALIST is the
2755 user-level va_list object, of type __builtin_va_list. */
2757 arm_extract_valist_ptr (tree valist
)
2759 if (TREE_TYPE (valist
) == error_mark_node
)
2760 return error_mark_node
;
2762 /* On an AAPCS target, the pointer is stored within "struct
2764 if (TARGET_AAPCS_BASED
)
2766 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2767 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2768 valist
, ap_field
, NULL_TREE
);
2774 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2776 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2778 valist
= arm_extract_valist_ptr (valist
);
2779 std_expand_builtin_va_start (valist
, nextarg
);
2782 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2784 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2787 valist
= arm_extract_valist_ptr (valist
);
2788 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2791 /* Check any incompatible options that the user has specified. */
2793 arm_option_check_internal (struct gcc_options
*opts
)
2795 int flags
= opts
->x_target_flags
;
2797 /* iWMMXt and NEON are incompatible. */
2799 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2800 error ("iWMMXt and NEON are incompatible");
2802 /* Make sure that the processor choice does not conflict with any of the
2803 other command line choices. */
2804 if (TARGET_ARM_P (flags
)
2805 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2806 error ("target CPU does not support ARM mode");
2808 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2809 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2810 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2812 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2813 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2815 /* If this target is normally configured to use APCS frames, warn if they
2816 are turned off and debugging is turned on. */
2817 if (TARGET_ARM_P (flags
)
2818 && write_symbols
!= NO_DEBUG
2819 && !TARGET_APCS_FRAME
2820 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2821 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2823 /* iWMMXt unsupported under Thumb mode. */
2824 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2825 error ("iWMMXt unsupported under Thumb mode");
2827 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2828 error ("can not use -mtp=cp15 with 16-bit Thumb");
2830 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2832 error ("RTP PIC is incompatible with Thumb");
2836 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2838 if ((target_pure_code
|| target_slow_flash_data
)
2839 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2841 const char *flag
= (target_pure_code
? "-mpure-code" :
2842 "-mslow-flash-data");
2843 error ("%s only supports non-pic code on M-profile targets with the "
2844 "MOVT instruction", flag
);
2849 /* Recompute the global settings depending on target attribute options. */
2852 arm_option_params_internal (void)
2854 /* If we are not using the default (ARM mode) section anchor offset
2855 ranges, then set the correct ranges now. */
2858 /* Thumb-1 LDR instructions cannot have negative offsets.
2859 Permissible positive offset ranges are 5-bit (for byte loads),
2860 6-bit (for halfword loads), or 7-bit (for word loads).
2861 Empirical results suggest a 7-bit anchor range gives the best
2862 overall code size. */
2863 targetm
.min_anchor_offset
= 0;
2864 targetm
.max_anchor_offset
= 127;
2866 else if (TARGET_THUMB2
)
2868 /* The minimum is set such that the total size of the block
2869 for a particular anchor is 248 + 1 + 4095 bytes, which is
2870 divisible by eight, ensuring natural spacing of anchors. */
2871 targetm
.min_anchor_offset
= -248;
2872 targetm
.max_anchor_offset
= 4095;
2876 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2877 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2882 /* If optimizing for size, bump the number of instructions that we
2883 are prepared to conditionally execute (even on a StrongARM). */
2884 max_insns_skipped
= 6;
2886 /* For THUMB2, we limit the conditional sequence to one IT block. */
2888 max_insns_skipped
= arm_restrict_it
? 1 : 4;
2891 /* When -mrestrict-it is in use tone down the if-conversion. */
2892 max_insns_skipped
= (TARGET_THUMB2
&& arm_restrict_it
)
2893 ? 1 : current_tune
->max_insns_skipped
;
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper
;
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize
;
2904 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2906 if (opts
->x_align_functions
<= 0)
2907 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2908 && opts
->x_optimize_size
? 2 : 4;
2911 /* Implement targetm.override_options_after_change. */
2914 arm_override_options_after_change (void)
2916 arm_configure_build_target (&arm_active_target
,
2917 TREE_TARGET_OPTION (target_option_default_node
),
2918 &global_options_set
, false);
2920 arm_override_options_after_change_1 (&global_options
);
2924 arm_option_restore (struct gcc_options
*, struct cl_target_option
*ptr
)
2926 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2930 /* Reset options between modes that the user has specified. */
2932 arm_option_override_internal (struct gcc_options
*opts
,
2933 struct gcc_options
*opts_set
)
2935 arm_override_options_after_change_1 (opts
);
2937 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2939 /* The default is to enable interworking, so this warning message would
2940 be confusing to users who have just compiled with, eg, -march=armv3. */
2941 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2942 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2945 if (TARGET_THUMB_P (opts
->x_target_flags
)
2946 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2948 warning (0, "target CPU does not support THUMB instructions");
2949 opts
->x_target_flags
&= ~MASK_THUMB
;
2952 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2954 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2955 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2958 /* Callee super interworking implies thumb interworking. Adding
2959 this to the flags here simplifies the logic elsewhere. */
2960 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2961 opts
->x_target_flags
|= MASK_INTERWORK
;
2963 /* need to remember initial values so combinaisons of options like
2964 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2965 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2967 if (! opts_set
->x_arm_restrict_it
)
2968 opts
->x_arm_restrict_it
= arm_arch8
;
2970 /* ARM execution state and M profile don't have [restrict] IT. */
2971 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2972 opts
->x_arm_restrict_it
= 0;
2974 /* Enable -munaligned-access by default for
2975 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2976 i.e. Thumb2 and ARM state only.
2977 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2978 - ARMv8 architecture-base processors.
2980 Disable -munaligned-access by default for
2981 - all pre-ARMv6 architecture-based processors
2982 - ARMv6-M architecture-based processors
2983 - ARMv8-M Baseline processors. */
2985 if (! opts_set
->x_unaligned_access
)
2987 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
2988 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
2990 else if (opts
->x_unaligned_access
== 1
2991 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
2993 warning (0, "target CPU does not support unaligned accesses");
2994 opts
->x_unaligned_access
= 0;
2997 /* Don't warn since it's on by default in -O2. */
2998 if (TARGET_THUMB1_P (opts
->x_target_flags
))
2999 opts
->x_flag_schedule_insns
= 0;
3001 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3003 /* Disable shrink-wrap when optimizing function for size, since it tends to
3004 generate additional returns. */
3005 if (optimize_function_for_size_p (cfun
)
3006 && TARGET_THUMB2_P (opts
->x_target_flags
))
3007 opts
->x_flag_shrink_wrap
= false;
3009 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3011 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3012 - epilogue_insns - does not accurately model the corresponding insns
3013 emitted in the asm file. In particular, see the comment in thumb_exit
3014 'Find out how many of the (return) argument registers we can corrupt'.
3015 As a consequence, the epilogue may clobber registers without fipa-ra
3016 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3017 TODO: Accurately model clobbers for epilogue_insns and reenable
3019 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3020 opts
->x_flag_ipa_ra
= 0;
3022 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3024 /* Thumb2 inline assembly code should always use unified syntax.
3025 This will apply to ARM and Thumb1 eventually. */
3026 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3028 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3029 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3033 /* Convert a static initializer array of feature bits to sbitmap
3036 arm_initialize_isa (sbitmap isa
, const enum isa_feature
*isa_bits
)
3039 while (*isa_bits
!= isa_nobit
)
3040 bitmap_set_bit (isa
, *(isa_bits
++));
3043 static sbitmap isa_all_fpubits
;
3044 static sbitmap isa_quirkbits
;
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3050 arm_configure_build_target (struct arm_build_target
*target
,
3051 struct cl_target_option
*opts
,
3052 struct gcc_options
*opts_set
,
3053 bool warn_compatible
)
3055 const struct processors
*arm_selected_tune
= NULL
;
3056 const struct processors
*arm_selected_arch
= NULL
;
3057 const struct processors
*arm_selected_cpu
= NULL
;
3058 const struct arm_fpu_desc
*arm_selected_fpu
= NULL
;
3060 bitmap_clear (target
->isa
);
3061 target
->core_name
= NULL
;
3062 target
->arch_name
= NULL
;
3064 if (opts_set
->x_arm_arch_option
)
3065 arm_selected_arch
= &all_architectures
[opts
->x_arm_arch_option
];
3067 if (opts_set
->x_arm_cpu_option
)
3069 arm_selected_cpu
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3070 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_cpu_option
];
3073 if (opts_set
->x_arm_tune_option
)
3074 arm_selected_tune
= &all_cores
[(int) opts
->x_arm_tune_option
];
3076 if (arm_selected_arch
)
3078 arm_initialize_isa (target
->isa
, arm_selected_arch
->isa_bits
);
3080 if (arm_selected_cpu
)
3082 auto_sbitmap
cpu_isa (isa_num_bits
);
3084 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->isa_bits
);
3085 bitmap_xor (cpu_isa
, cpu_isa
, target
->isa
);
3086 /* Ignore any bits that are quirk bits. */
3087 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_quirkbits
);
3088 /* Ignore (for now) any bits that might be set by -mfpu. */
3089 bitmap_and_compl (cpu_isa
, cpu_isa
, isa_all_fpubits
);
3091 if (!bitmap_empty_p (cpu_isa
))
3093 if (warn_compatible
)
3094 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3095 arm_selected_cpu
->name
, arm_selected_arch
->name
);
3096 /* -march wins for code generation.
3097 -mcpu wins for default tuning. */
3098 if (!arm_selected_tune
)
3099 arm_selected_tune
= arm_selected_cpu
;
3101 arm_selected_cpu
= arm_selected_arch
;
3102 target
->arch_name
= arm_selected_arch
->name
;
3106 /* Architecture and CPU are essentially the same.
3107 Prefer the CPU setting. */
3108 arm_selected_arch
= NULL
;
3109 target
->core_name
= arm_selected_cpu
->name
;
3114 /* Pick a CPU based on the architecture. */
3115 arm_selected_cpu
= arm_selected_arch
;
3116 target
->arch_name
= arm_selected_arch
->name
;
3117 /* Note: target->core_name is left unset in this path. */
3120 else if (arm_selected_cpu
)
3122 target
->core_name
= arm_selected_cpu
->name
;
3123 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3125 /* If the user did not specify a processor, choose one for them. */
3128 const struct processors
* sel
;
3129 auto_sbitmap
sought_isa (isa_num_bits
);
3130 bitmap_clear (sought_isa
);
3131 auto_sbitmap
default_isa (isa_num_bits
);
3133 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
3134 gcc_assert (arm_selected_cpu
->name
);
3136 /* RWE: All of the selection logic below (to the end of this
3137 'if' clause) looks somewhat suspect. It appears to be mostly
3138 there to support forcing thumb support when the default CPU
3139 does not have thumb (somewhat dubious in terms of what the
3140 user might be expecting). I think it should be removed once
3141 support for the pre-thumb era cores is removed. */
3142 sel
= arm_selected_cpu
;
3143 arm_initialize_isa (default_isa
, sel
->isa_bits
);
3145 /* Now check to see if the user has specified any command line
3146 switches that require certain abilities from the cpu. */
3148 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3150 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3151 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3153 /* There are no ARM processors that support both APCS-26 and
3154 interworking. Therefore we forcibly remove MODE26 from
3155 from the isa features here (if it was set), so that the
3156 search below will always be able to find a compatible
3158 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3161 /* If there are such requirements and the default CPU does not
3162 satisfy them, we need to run over the complete list of
3163 cores looking for one that is satisfactory. */
3164 if (!bitmap_empty_p (sought_isa
)
3165 && !bitmap_subset_p (sought_isa
, default_isa
))
3167 auto_sbitmap
candidate_isa (isa_num_bits
);
3168 /* We're only interested in a CPU with at least the
3169 capabilities of the default CPU and the required
3170 additional features. */
3171 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3173 /* Try to locate a CPU type that supports all of the abilities
3174 of the default CPU, plus the extra abilities requested by
3176 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3178 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3179 /* An exact match? */
3180 if (bitmap_equal_p (default_isa
, candidate_isa
))
3184 if (sel
->name
== NULL
)
3186 unsigned current_bit_count
= isa_num_bits
;
3187 const struct processors
* best_fit
= NULL
;
3189 /* Ideally we would like to issue an error message here
3190 saying that it was not possible to find a CPU compatible
3191 with the default CPU, but which also supports the command
3192 line options specified by the programmer, and so they
3193 ought to use the -mcpu=<name> command line option to
3194 override the default CPU type.
3196 If we cannot find a CPU that has exactly the
3197 characteristics of the default CPU and the given
3198 command line options we scan the array again looking
3199 for a best match. The best match must have at least
3200 the capabilities of the perfect match. */
3201 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
3203 arm_initialize_isa (candidate_isa
, sel
->isa_bits
);
3205 if (bitmap_subset_p (default_isa
, candidate_isa
))
3209 bitmap_and_compl (candidate_isa
, candidate_isa
,
3211 count
= bitmap_popcount (candidate_isa
);
3213 if (count
< current_bit_count
)
3216 current_bit_count
= count
;
3220 gcc_assert (best_fit
);
3224 arm_selected_cpu
= sel
;
3227 /* Now we know the CPU, we can finally initialize the target
3229 target
->core_name
= arm_selected_cpu
->name
;
3230 arm_initialize_isa (target
->isa
, arm_selected_cpu
->isa_bits
);
3233 gcc_assert (arm_selected_cpu
);
3235 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3237 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3238 auto_sbitmap
fpu_bits (isa_num_bits
);
3240 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3241 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3242 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3244 else if (target
->core_name
== NULL
)
3245 /* To support this we need to be able to parse FPU feature options
3246 from the architecture string. */
3247 sorry ("-mfpu=auto not currently supported without an explicit CPU.");
3249 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3250 if (!arm_selected_tune
)
3251 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
3253 /* Finish initializing the target structure. */
3254 target
->arch_pp_name
= arm_selected_cpu
->arch
;
3255 target
->base_arch
= arm_selected_cpu
->base_arch
;
3256 target
->arch_core
= arm_selected_cpu
->core
;
3258 target
->tune_flags
= arm_selected_tune
->tune_flags
;
3259 target
->tune
= arm_selected_tune
->tune
;
3260 target
->tune_core
= arm_selected_tune
->core
;
3263 /* Fix up any incompatible options that the user has specified. */
3265 arm_option_override (void)
3267 static const enum isa_feature fpu_bitlist
[] = { ISA_ALL_FPU
, isa_nobit
};
3268 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3269 cl_target_option opts
;
3271 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3272 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3274 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3275 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3277 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3279 if (!global_options_set
.x_arm_fpu_index
)
3281 const char *target_fpu_name
;
3285 #ifdef FPUTYPE_DEFAULT
3286 target_fpu_name
= FPUTYPE_DEFAULT
;
3288 target_fpu_name
= "vfp";
3291 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &fpu_index
,
3294 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3297 cl_target_option_save (&opts
, &global_options
);
3298 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3301 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3302 SUBTARGET_OVERRIDE_OPTIONS
;
3305 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3306 arm_base_arch
= arm_active_target
.base_arch
;
3308 arm_tune
= arm_active_target
.tune_core
;
3309 tune_flags
= arm_active_target
.tune_flags
;
3310 current_tune
= arm_active_target
.tune
;
3312 /* TBD: Dwarf info for apcs frame is not handled yet. */
3313 if (TARGET_APCS_FRAME
)
3314 flag_shrink_wrap
= false;
3316 /* BPABI targets use linker tricks to allow interworking on cores
3317 without thumb support. */
3318 if (TARGET_INTERWORK
3320 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3322 warning (0, "target CPU does not support interworking" );
3323 target_flags
&= ~MASK_INTERWORK
;
3326 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3328 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3329 target_flags
|= MASK_APCS_FRAME
;
3332 if (TARGET_POKE_FUNCTION_NAME
)
3333 target_flags
|= MASK_APCS_FRAME
;
3335 if (TARGET_APCS_REENT
&& flag_pic
)
3336 error ("-fpic and -mapcs-reent are incompatible");
3338 if (TARGET_APCS_REENT
)
3339 warning (0, "APCS reentrant code not supported. Ignored");
3341 /* Initialize boolean versions of the architectural flags, for use
3342 in the arm.md file. */
3343 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv3m
);
3344 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv4
);
3345 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3346 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5
);
3347 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv5e
);
3348 arm_arch5te
= arm_arch5e
3349 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3350 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6
);
3351 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv6k
);
3352 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3353 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3354 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7
);
3355 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv7em
);
3356 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8
);
3357 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_1
);
3358 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_ARMv8_2
);
3359 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3360 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3361 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3362 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3363 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3364 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3365 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3366 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3367 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3368 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3369 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3372 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3373 error ("selected fp16 options are incompatible");
3374 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3378 /* Set up some tuning parameters. */
3379 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3380 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3381 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3382 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3383 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3384 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3386 /* And finally, set up some quirks. */
3387 arm_arch_no_volatile_ce
3388 = bitmap_bit_p (arm_active_target
.isa
, isa_quirk_no_volatile_ce
);
3390 = arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
, isa_quirk_ARMv6kz
);
3392 /* V5 code we generate is completely interworking capable, so we turn off
3393 TARGET_INTERWORK here to avoid many tests later on. */
3395 /* XXX However, we must pass the right pre-processor defines to CPP
3396 or GLD can get confused. This is a hack. */
3397 if (TARGET_INTERWORK
)
3398 arm_cpp_interwork
= 1;
3401 target_flags
&= ~MASK_INTERWORK
;
3403 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3404 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3406 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3407 error ("iwmmxt abi requires an iwmmxt capable cpu");
3409 /* If soft-float is specified then don't use FPU. */
3410 if (TARGET_SOFT_FLOAT
)
3411 arm_fpu_attr
= FPU_NONE
;
3413 arm_fpu_attr
= FPU_VFP
;
3415 if (TARGET_AAPCS_BASED
)
3417 if (TARGET_CALLER_INTERWORKING
)
3418 error ("AAPCS does not support -mcaller-super-interworking");
3420 if (TARGET_CALLEE_INTERWORKING
)
3421 error ("AAPCS does not support -mcallee-super-interworking");
3424 /* __fp16 support currently assumes the core has ldrh. */
3425 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3426 sorry ("__fp16 and no ldrh");
3428 if (TARGET_AAPCS_BASED
)
3430 if (arm_abi
== ARM_ABI_IWMMXT
)
3431 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3432 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
3433 && TARGET_HARD_FLOAT
)
3435 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3436 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_VFPv2
))
3437 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3440 arm_pcs_default
= ARM_PCS_AAPCS
;
3444 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3445 sorry ("-mfloat-abi=hard and VFP");
3447 if (arm_abi
== ARM_ABI_APCS
)
3448 arm_pcs_default
= ARM_PCS_APCS
;
3450 arm_pcs_default
= ARM_PCS_ATPCS
;
3453 /* For arm2/3 there is no need to do any scheduling if we are doing
3454 software floating-point. */
3455 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3456 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3458 /* Use the cp15 method if it is available. */
3459 if (target_thread_pointer
== TP_AUTO
)
3461 if (arm_arch6k
&& !TARGET_THUMB1
)
3462 target_thread_pointer
= TP_CP15
;
3464 target_thread_pointer
= TP_SOFT
;
3467 /* Override the default structure alignment for AAPCS ABI. */
3468 if (!global_options_set
.x_arm_structure_size_boundary
)
3470 if (TARGET_AAPCS_BASED
)
3471 arm_structure_size_boundary
= 8;
3475 if (arm_structure_size_boundary
!= 8
3476 && arm_structure_size_boundary
!= 32
3477 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3479 if (ARM_DOUBLEWORD_ALIGN
)
3481 "structure size boundary can only be set to 8, 32 or 64");
3483 warning (0, "structure size boundary can only be set to 8 or 32");
3484 arm_structure_size_boundary
3485 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3489 if (TARGET_VXWORKS_RTP
)
3491 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3492 arm_pic_data_is_text_relative
= 0;
3495 && !arm_pic_data_is_text_relative
3496 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3497 /* When text & data segments don't have a fixed displacement, the
3498 intended use is with a single, read only, pic base register.
3499 Unless the user explicitly requested not to do that, set
3501 target_flags
|= MASK_SINGLE_PIC_BASE
;
3503 /* If stack checking is disabled, we can use r10 as the PIC register,
3504 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3505 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3507 if (TARGET_VXWORKS_RTP
)
3508 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3509 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3512 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3513 arm_pic_register
= 9;
3515 if (arm_pic_register_string
!= NULL
)
3517 int pic_register
= decode_reg_name (arm_pic_register_string
);
3520 warning (0, "-mpic-register= is useless without -fpic");
3522 /* Prevent the user from choosing an obviously stupid PIC register. */
3523 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3524 || pic_register
== HARD_FRAME_POINTER_REGNUM
3525 || pic_register
== STACK_POINTER_REGNUM
3526 || pic_register
>= PC_REGNUM
3527 || (TARGET_VXWORKS_RTP
3528 && (unsigned int) pic_register
!= arm_pic_register
))
3529 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3531 arm_pic_register
= pic_register
;
3534 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3535 if (fix_cm3_ldrd
== 2)
3537 if (bitmap_bit_p (arm_active_target
.isa
, isa_quirk_cm3_ldrd
))
3543 /* Hot/Cold partitioning is not currently supported, since we can't
3544 handle literal pool placement in that case. */
3545 if (flag_reorder_blocks_and_partition
)
3547 inform (input_location
,
3548 "-freorder-blocks-and-partition not supported on this architecture");
3549 flag_reorder_blocks_and_partition
= 0;
3550 flag_reorder_blocks
= 1;
3554 /* Hoisting PIC address calculations more aggressively provides a small,
3555 but measurable, size reduction for PIC code. Therefore, we decrease
3556 the bar for unrestricted expression hoisting to the cost of PIC address
3557 calculation, which is 2 instructions. */
3558 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3559 global_options
.x_param_values
,
3560 global_options_set
.x_param_values
);
3562 /* ARM EABI defaults to strict volatile bitfields. */
3563 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3564 && abi_version_at_least(2))
3565 flag_strict_volatile_bitfields
= 1;
3567 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3568 have deemed it beneficial (signified by setting
3569 prefetch.num_slots to 1 or more). */
3570 if (flag_prefetch_loop_arrays
< 0
3573 && current_tune
->prefetch
.num_slots
> 0)
3574 flag_prefetch_loop_arrays
= 1;
3576 /* Set up parameters to be used in prefetching algorithm. Do not
3577 override the defaults unless we are tuning for a core we have
3578 researched values for. */
3579 if (current_tune
->prefetch
.num_slots
> 0)
3580 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3581 current_tune
->prefetch
.num_slots
,
3582 global_options
.x_param_values
,
3583 global_options_set
.x_param_values
);
3584 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3585 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3586 current_tune
->prefetch
.l1_cache_line_size
,
3587 global_options
.x_param_values
,
3588 global_options_set
.x_param_values
);
3589 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3590 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3591 current_tune
->prefetch
.l1_cache_size
,
3592 global_options
.x_param_values
,
3593 global_options_set
.x_param_values
);
3595 /* Use Neon to perform 64-bits operations rather than core
3597 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3598 if (use_neon_for_64bits
== 1)
3599 prefer_neon_for_64bits
= true;
3601 /* Use the alternative scheduling-pressure algorithm by default. */
3602 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3603 global_options
.x_param_values
,
3604 global_options_set
.x_param_values
);
3606 /* Look through ready list and all of queue for instructions
3607 relevant for L2 auto-prefetcher. */
3608 int param_sched_autopref_queue_depth
;
3610 switch (current_tune
->sched_autopref
)
3612 case tune_params::SCHED_AUTOPREF_OFF
:
3613 param_sched_autopref_queue_depth
= -1;
3616 case tune_params::SCHED_AUTOPREF_RANK
:
3617 param_sched_autopref_queue_depth
= 0;
3620 case tune_params::SCHED_AUTOPREF_FULL
:
3621 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3628 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3629 param_sched_autopref_queue_depth
,
3630 global_options
.x_param_values
,
3631 global_options_set
.x_param_values
);
3633 /* Currently, for slow flash data, we just disable literal pools. We also
3634 disable it for pure-code. */
3635 if (target_slow_flash_data
|| target_pure_code
)
3636 arm_disable_literal_pool
= true;
3638 if (use_cmse
&& !arm_arch_cmse
)
3639 error ("target CPU does not support ARMv8-M Security Extensions");
3641 /* Disable scheduling fusion by default if it's not armv7 processor
3642 or doesn't prefer ldrd/strd. */
3643 if (flag_schedule_fusion
== 2
3644 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3645 flag_schedule_fusion
= 0;
3647 /* Need to remember initial options before they are overriden. */
3648 init_optimize
= build_optimization_node (&global_options
);
3650 arm_option_override_internal (&global_options
, &global_options_set
);
3651 arm_option_check_internal (&global_options
);
3652 arm_option_params_internal ();
3654 /* Create the default target_options structure. */
3655 target_option_default_node
= target_option_current_node
3656 = build_target_option_node (&global_options
);
3658 /* Register global variables with the garbage collector. */
3659 arm_add_gc_roots ();
3661 /* Init initial mode for testing. */
3662 thumb_flipper
= TARGET_THUMB
;
3666 arm_add_gc_roots (void)
3668 gcc_obstack_init(&minipool_obstack
);
3669 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3672 /* A table of known ARM exception types.
3673 For use with the interrupt function attribute. */
3677 const char *const arg
;
3678 const unsigned long return_value
;
3682 static const isr_attribute_arg isr_attribute_args
[] =
3684 { "IRQ", ARM_FT_ISR
},
3685 { "irq", ARM_FT_ISR
},
3686 { "FIQ", ARM_FT_FIQ
},
3687 { "fiq", ARM_FT_FIQ
},
3688 { "ABORT", ARM_FT_ISR
},
3689 { "abort", ARM_FT_ISR
},
3690 { "ABORT", ARM_FT_ISR
},
3691 { "abort", ARM_FT_ISR
},
3692 { "UNDEF", ARM_FT_EXCEPTION
},
3693 { "undef", ARM_FT_EXCEPTION
},
3694 { "SWI", ARM_FT_EXCEPTION
},
3695 { "swi", ARM_FT_EXCEPTION
},
3696 { NULL
, ARM_FT_NORMAL
}
3699 /* Returns the (interrupt) function type of the current
3700 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3702 static unsigned long
3703 arm_isr_value (tree argument
)
3705 const isr_attribute_arg
* ptr
;
3709 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3711 /* No argument - default to IRQ. */
3712 if (argument
== NULL_TREE
)
3715 /* Get the value of the argument. */
3716 if (TREE_VALUE (argument
) == NULL_TREE
3717 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3718 return ARM_FT_UNKNOWN
;
3720 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3722 /* Check it against the list of known arguments. */
3723 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3724 if (streq (arg
, ptr
->arg
))
3725 return ptr
->return_value
;
3727 /* An unrecognized interrupt type. */
3728 return ARM_FT_UNKNOWN
;
3731 /* Computes the type of the current function. */
3733 static unsigned long
3734 arm_compute_func_type (void)
3736 unsigned long type
= ARM_FT_UNKNOWN
;
3740 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3742 /* Decide if the current function is volatile. Such functions
3743 never return, and many memory cycles can be saved by not storing
3744 register values that will never be needed again. This optimization
3745 was added to speed up context switching in a kernel application. */
3747 && (TREE_NOTHROW (current_function_decl
)
3748 || !(flag_unwind_tables
3750 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3751 && TREE_THIS_VOLATILE (current_function_decl
))
3752 type
|= ARM_FT_VOLATILE
;
3754 if (cfun
->static_chain_decl
!= NULL
)
3755 type
|= ARM_FT_NESTED
;
3757 attr
= DECL_ATTRIBUTES (current_function_decl
);
3759 a
= lookup_attribute ("naked", attr
);
3761 type
|= ARM_FT_NAKED
;
3763 a
= lookup_attribute ("isr", attr
);
3765 a
= lookup_attribute ("interrupt", attr
);
3768 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3770 type
|= arm_isr_value (TREE_VALUE (a
));
3772 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3773 type
|= ARM_FT_CMSE_ENTRY
;
3778 /* Returns the type of the current function. */
3781 arm_current_func_type (void)
3783 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3784 cfun
->machine
->func_type
= arm_compute_func_type ();
3786 return cfun
->machine
->func_type
;
3790 arm_allocate_stack_slots_for_args (void)
3792 /* Naked functions should not allocate stack slots for arguments. */
3793 return !IS_NAKED (arm_current_func_type ());
3797 arm_warn_func_return (tree decl
)
3799 /* Naked functions are implemented entirely in assembly, including the
3800 return sequence, so suppress warnings about this. */
3801 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3805 /* Output assembler code for a block containing the constant parts
3806 of a trampoline, leaving space for the variable parts.
3808 On the ARM, (if r8 is the static chain regnum, and remembering that
3809 referencing pc adds an offset of 8) the trampoline looks like:
3812 .word static chain value
3813 .word function's address
3814 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3817 arm_asm_trampoline_template (FILE *f
)
3819 fprintf (f
, "\t.syntax unified\n");
3823 fprintf (f
, "\t.arm\n");
3824 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3825 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3827 else if (TARGET_THUMB2
)
3829 fprintf (f
, "\t.thumb\n");
3830 /* The Thumb-2 trampoline is similar to the arm implementation.
3831 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3832 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3833 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3834 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3838 ASM_OUTPUT_ALIGN (f
, 2);
3839 fprintf (f
, "\t.code\t16\n");
3840 fprintf (f
, ".Ltrampoline_start:\n");
3841 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3842 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3843 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3844 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3845 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3846 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3848 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3849 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3852 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3855 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3857 rtx fnaddr
, mem
, a_tramp
;
3859 emit_block_move (m_tramp
, assemble_trampoline_template (),
3860 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3862 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3863 emit_move_insn (mem
, chain_value
);
3865 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3866 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3867 emit_move_insn (mem
, fnaddr
);
3869 a_tramp
= XEXP (m_tramp
, 0);
3870 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3871 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
3872 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3875 /* Thumb trampolines should be entered in thumb mode, so set
3876 the bottom bit of the address. */
3879 arm_trampoline_adjust_address (rtx addr
)
3882 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3883 NULL
, 0, OPTAB_LIB_WIDEN
);
3887 /* Return 1 if it is possible to return using a single instruction.
3888 If SIBLING is non-null, this is a test for a return before a sibling
3889 call. SIBLING is the call insn, so we can examine its register usage. */
3892 use_return_insn (int iscond
, rtx sibling
)
3895 unsigned int func_type
;
3896 unsigned long saved_int_regs
;
3897 unsigned HOST_WIDE_INT stack_adjust
;
3898 arm_stack_offsets
*offsets
;
3900 /* Never use a return instruction before reload has run. */
3901 if (!reload_completed
)
3904 func_type
= arm_current_func_type ();
3906 /* Naked, volatile and stack alignment functions need special
3908 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3911 /* So do interrupt functions that use the frame pointer and Thumb
3912 interrupt functions. */
3913 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3916 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3917 && !optimize_function_for_size_p (cfun
))
3920 offsets
= arm_get_frame_offsets ();
3921 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3923 /* As do variadic functions. */
3924 if (crtl
->args
.pretend_args_size
3925 || cfun
->machine
->uses_anonymous_args
3926 /* Or if the function calls __builtin_eh_return () */
3927 || crtl
->calls_eh_return
3928 /* Or if the function calls alloca */
3929 || cfun
->calls_alloca
3930 /* Or if there is a stack adjustment. However, if the stack pointer
3931 is saved on the stack, we can use a pre-incrementing stack load. */
3932 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3933 && stack_adjust
== 4))
3934 /* Or if the static chain register was saved above the frame, under the
3935 assumption that the stack pointer isn't saved on the stack. */
3936 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3937 && arm_compute_static_chain_stack_bytes() != 0))
3940 saved_int_regs
= offsets
->saved_regs_mask
;
3942 /* Unfortunately, the insn
3944 ldmib sp, {..., sp, ...}
3946 triggers a bug on most SA-110 based devices, such that the stack
3947 pointer won't be correctly restored if the instruction takes a
3948 page fault. We work around this problem by popping r3 along with
3949 the other registers, since that is never slower than executing
3950 another instruction.
3952 We test for !arm_arch5 here, because code for any architecture
3953 less than this could potentially be run on one of the buggy
3955 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3957 /* Validate that r3 is a call-clobbered register (always true in
3958 the default abi) ... */
3959 if (!call_used_regs
[3])
3962 /* ... that it isn't being used for a return value ... */
3963 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
3966 /* ... or for a tail-call argument ... */
3969 gcc_assert (CALL_P (sibling
));
3971 if (find_regno_fusage (sibling
, USE
, 3))
3975 /* ... and that there are no call-saved registers in r0-r2
3976 (always true in the default ABI). */
3977 if (saved_int_regs
& 0x7)
3981 /* Can't be done if interworking with Thumb, and any registers have been
3983 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
3986 /* On StrongARM, conditional returns are expensive if they aren't
3987 taken and multiple registers have been stacked. */
3988 if (iscond
&& arm_tune_strongarm
)
3990 /* Conditional return when just the LR is stored is a simple
3991 conditional-load instruction, that's not expensive. */
3992 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
3996 && arm_pic_register
!= INVALID_REGNUM
3997 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4001 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4002 several instructions if anything needs to be popped. */
4003 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4006 /* If there are saved registers but the LR isn't saved, then we need
4007 two instructions for the return. */
4008 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4011 /* Can't be done if any of the VFP regs are pushed,
4012 since this also requires an insn. */
4013 if (TARGET_HARD_FLOAT
)
4014 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4015 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4018 if (TARGET_REALLY_IWMMXT
)
4019 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4020 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4026 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4027 shrink-wrapping if possible. This is the case if we need to emit a
4028 prologue, which we can test by looking at the offsets. */
4030 use_simple_return_p (void)
4032 arm_stack_offsets
*offsets
;
4034 offsets
= arm_get_frame_offsets ();
4035 return offsets
->outgoing_args
!= 0;
4038 /* Return TRUE if int I is a valid immediate ARM constant. */
4041 const_ok_for_arm (HOST_WIDE_INT i
)
4045 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4046 be all zero, or all one. */
4047 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4048 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4049 != ((~(unsigned HOST_WIDE_INT
) 0)
4050 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4053 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4055 /* Fast return for 0 and small values. We must do this for zero, since
4056 the code below can't handle that one case. */
4057 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4060 /* Get the number of trailing zeros. */
4061 lowbit
= ffs((int) i
) - 1;
4063 /* Only even shifts are allowed in ARM mode so round down to the
4064 nearest even number. */
4068 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4073 /* Allow rotated constants in ARM mode. */
4075 && ((i
& ~0xc000003f) == 0
4076 || (i
& ~0xf000000f) == 0
4077 || (i
& ~0xfc000003) == 0))
4080 else if (TARGET_THUMB2
)
4084 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4087 if (i
== v
|| i
== (v
| (v
<< 8)))
4090 /* Allow repeated pattern 0xXY00XY00. */
4096 else if (TARGET_HAVE_MOVT
)
4098 /* Thumb-1 Targets with MOVT. */
4108 /* Return true if I is a valid constant for the operation CODE. */
4110 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4112 if (const_ok_for_arm (i
))
4118 /* See if we can use movw. */
4119 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4122 /* Otherwise, try mvn. */
4123 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4126 /* See if we can use addw or subw. */
4128 && ((i
& 0xfffff000) == 0
4129 || ((-i
) & 0xfffff000) == 0))
4150 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4152 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4158 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4162 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4169 /* Return true if I is a valid di mode constant for the operation CODE. */
4171 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4173 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4174 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4175 rtx hi
= GEN_INT (hi_val
);
4176 rtx lo
= GEN_INT (lo_val
);
4186 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4187 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4189 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4196 /* Emit a sequence of insns to handle a large constant.
4197 CODE is the code of the operation required, it can be any of SET, PLUS,
4198 IOR, AND, XOR, MINUS;
4199 MODE is the mode in which the operation is being performed;
4200 VAL is the integer to operate on;
4201 SOURCE is the other operand (a register, or a null-pointer for SET);
4202 SUBTARGETS means it is safe to create scratch registers if that will
4203 either produce a simpler sequence, or we will want to cse the values.
4204 Return value is the number of insns emitted. */
4206 /* ??? Tweak this for thumb2. */
4208 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4209 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4213 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4214 cond
= COND_EXEC_TEST (PATTERN (insn
));
4218 if (subtargets
|| code
== SET
4219 || (REG_P (target
) && REG_P (source
)
4220 && REGNO (target
) != REGNO (source
)))
4222 /* After arm_reorg has been called, we can't fix up expensive
4223 constants by pushing them into memory so we must synthesize
4224 them in-line, regardless of the cost. This is only likely to
4225 be more costly on chips that have load delay slots and we are
4226 compiling without running the scheduler (so no splitting
4227 occurred before the final instruction emission).
4229 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4231 if (!cfun
->machine
->after_arm_reorg
4233 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4235 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4240 /* Currently SET is the only monadic value for CODE, all
4241 the rest are diadic. */
4242 if (TARGET_USE_MOVT
)
4243 arm_emit_movpair (target
, GEN_INT (val
));
4245 emit_set_insn (target
, GEN_INT (val
));
4251 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4253 if (TARGET_USE_MOVT
)
4254 arm_emit_movpair (temp
, GEN_INT (val
));
4256 emit_set_insn (temp
, GEN_INT (val
));
4258 /* For MINUS, the value is subtracted from, since we never
4259 have subtraction of a constant. */
4261 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4263 emit_set_insn (target
,
4264 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4270 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4274 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4275 ARM/THUMB2 immediates, and add up to VAL.
4276 Thr function return value gives the number of insns required. */
4278 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4279 struct four_ints
*return_sequence
)
4281 int best_consecutive_zeros
= 0;
4285 struct four_ints tmp_sequence
;
4287 /* If we aren't targeting ARM, the best place to start is always at
4288 the bottom, otherwise look more closely. */
4291 for (i
= 0; i
< 32; i
+= 2)
4293 int consecutive_zeros
= 0;
4295 if (!(val
& (3 << i
)))
4297 while ((i
< 32) && !(val
& (3 << i
)))
4299 consecutive_zeros
+= 2;
4302 if (consecutive_zeros
> best_consecutive_zeros
)
4304 best_consecutive_zeros
= consecutive_zeros
;
4305 best_start
= i
- consecutive_zeros
;
4312 /* So long as it won't require any more insns to do so, it's
4313 desirable to emit a small constant (in bits 0...9) in the last
4314 insn. This way there is more chance that it can be combined with
4315 a later addressing insn to form a pre-indexed load or store
4316 operation. Consider:
4318 *((volatile int *)0xe0000100) = 1;
4319 *((volatile int *)0xe0000110) = 2;
4321 We want this to wind up as:
4325 str rB, [rA, #0x100]
4327 str rB, [rA, #0x110]
4329 rather than having to synthesize both large constants from scratch.
4331 Therefore, we calculate how many insns would be required to emit
4332 the constant starting from `best_start', and also starting from
4333 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4334 yield a shorter sequence, we may as well use zero. */
4335 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4337 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4339 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4340 if (insns2
<= insns1
)
4342 *return_sequence
= tmp_sequence
;
4350 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4352 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4353 struct four_ints
*return_sequence
, int i
)
4355 int remainder
= val
& 0xffffffff;
4358 /* Try and find a way of doing the job in either two or three
4361 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4362 location. We start at position I. This may be the MSB, or
4363 optimial_immediate_sequence may have positioned it at the largest block
4364 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4365 wrapping around to the top of the word when we drop off the bottom.
4366 In the worst case this code should produce no more than four insns.
4368 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4369 constants, shifted to any arbitrary location. We should always start
4374 unsigned int b1
, b2
, b3
, b4
;
4375 unsigned HOST_WIDE_INT result
;
4378 gcc_assert (insns
< 4);
4383 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4384 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4387 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4388 /* We can use addw/subw for the last 12 bits. */
4392 /* Use an 8-bit shifted/rotated immediate. */
4396 result
= remainder
& ((0x0ff << end
)
4397 | ((i
< end
) ? (0xff >> (32 - end
))
4404 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4405 arbitrary shifts. */
4406 i
-= TARGET_ARM
? 2 : 1;
4410 /* Next, see if we can do a better job with a thumb2 replicated
4413 We do it this way around to catch the cases like 0x01F001E0 where
4414 two 8-bit immediates would work, but a replicated constant would
4417 TODO: 16-bit constants that don't clear all the bits, but still win.
4418 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4421 b1
= (remainder
& 0xff000000) >> 24;
4422 b2
= (remainder
& 0x00ff0000) >> 16;
4423 b3
= (remainder
& 0x0000ff00) >> 8;
4424 b4
= remainder
& 0xff;
4428 /* The 8-bit immediate already found clears b1 (and maybe b2),
4429 but must leave b3 and b4 alone. */
4431 /* First try to find a 32-bit replicated constant that clears
4432 almost everything. We can assume that we can't do it in one,
4433 or else we wouldn't be here. */
4434 unsigned int tmp
= b1
& b2
& b3
& b4
;
4435 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4437 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4438 + (tmp
== b3
) + (tmp
== b4
);
4440 && (matching_bytes
>= 3
4441 || (matching_bytes
== 2
4442 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4444 /* At least 3 of the bytes match, and the fourth has at
4445 least as many bits set, or two of the bytes match
4446 and it will only require one more insn to finish. */
4454 /* Second, try to find a 16-bit replicated constant that can
4455 leave three of the bytes clear. If b2 or b4 is already
4456 zero, then we can. If the 8-bit from above would not
4457 clear b2 anyway, then we still win. */
4458 else if (b1
== b3
&& (!b2
|| !b4
4459 || (remainder
& 0x00ff0000 & ~result
)))
4461 result
= remainder
& 0xff00ff00;
4467 /* The 8-bit immediate already found clears b2 (and maybe b3)
4468 and we don't get here unless b1 is alredy clear, but it will
4469 leave b4 unchanged. */
4471 /* If we can clear b2 and b4 at once, then we win, since the
4472 8-bits couldn't possibly reach that far. */
4475 result
= remainder
& 0x00ff00ff;
4481 return_sequence
->i
[insns
++] = result
;
4482 remainder
&= ~result
;
4484 if (code
== SET
|| code
== MINUS
)
4492 /* Emit an instruction with the indicated PATTERN. If COND is
4493 non-NULL, conditionalize the execution of the instruction on COND
4497 emit_constant_insn (rtx cond
, rtx pattern
)
4500 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4501 emit_insn (pattern
);
4504 /* As above, but extra parameter GENERATE which, if clear, suppresses
4508 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4509 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4510 int subtargets
, int generate
)
4514 int final_invert
= 0;
4516 int set_sign_bit_copies
= 0;
4517 int clear_sign_bit_copies
= 0;
4518 int clear_zero_bit_copies
= 0;
4519 int set_zero_bit_copies
= 0;
4520 int insns
= 0, neg_insns
, inv_insns
;
4521 unsigned HOST_WIDE_INT temp1
, temp2
;
4522 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4523 struct four_ints
*immediates
;
4524 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4526 /* Find out which operations are safe for a given CODE. Also do a quick
4527 check for degenerate cases; these can occur when DImode operations
4540 if (remainder
== 0xffffffff)
4543 emit_constant_insn (cond
,
4544 gen_rtx_SET (target
,
4545 GEN_INT (ARM_SIGN_EXTEND (val
))));
4551 if (reload_completed
&& rtx_equal_p (target
, source
))
4555 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4564 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4567 if (remainder
== 0xffffffff)
4569 if (reload_completed
&& rtx_equal_p (target
, source
))
4572 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4581 if (reload_completed
&& rtx_equal_p (target
, source
))
4584 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4588 if (remainder
== 0xffffffff)
4591 emit_constant_insn (cond
,
4592 gen_rtx_SET (target
,
4593 gen_rtx_NOT (mode
, source
)));
4600 /* We treat MINUS as (val - source), since (source - val) is always
4601 passed as (source + (-val)). */
4605 emit_constant_insn (cond
,
4606 gen_rtx_SET (target
,
4607 gen_rtx_NEG (mode
, source
)));
4610 if (const_ok_for_arm (val
))
4613 emit_constant_insn (cond
,
4614 gen_rtx_SET (target
,
4615 gen_rtx_MINUS (mode
, GEN_INT (val
),
4626 /* If we can do it in one insn get out quickly. */
4627 if (const_ok_for_op (val
, code
))
4630 emit_constant_insn (cond
,
4631 gen_rtx_SET (target
,
4633 ? gen_rtx_fmt_ee (code
, mode
, source
,
4639 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4641 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4642 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4646 if (mode
== SImode
&& i
== 16)
4647 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4649 emit_constant_insn (cond
,
4650 gen_zero_extendhisi2
4651 (target
, gen_lowpart (HImode
, source
)));
4653 /* Extz only supports SImode, but we can coerce the operands
4655 emit_constant_insn (cond
,
4656 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4657 gen_lowpart (SImode
, source
),
4658 GEN_INT (i
), const0_rtx
));
4664 /* Calculate a few attributes that may be useful for specific
4666 /* Count number of leading zeros. */
4667 for (i
= 31; i
>= 0; i
--)
4669 if ((remainder
& (1 << i
)) == 0)
4670 clear_sign_bit_copies
++;
4675 /* Count number of leading 1's. */
4676 for (i
= 31; i
>= 0; i
--)
4678 if ((remainder
& (1 << i
)) != 0)
4679 set_sign_bit_copies
++;
4684 /* Count number of trailing zero's. */
4685 for (i
= 0; i
<= 31; i
++)
4687 if ((remainder
& (1 << i
)) == 0)
4688 clear_zero_bit_copies
++;
4693 /* Count number of trailing 1's. */
4694 for (i
= 0; i
<= 31; i
++)
4696 if ((remainder
& (1 << i
)) != 0)
4697 set_zero_bit_copies
++;
4705 /* See if we can do this by sign_extending a constant that is known
4706 to be negative. This is a good, way of doing it, since the shift
4707 may well merge into a subsequent insn. */
4708 if (set_sign_bit_copies
> 1)
4710 if (const_ok_for_arm
4711 (temp1
= ARM_SIGN_EXTEND (remainder
4712 << (set_sign_bit_copies
- 1))))
4716 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4717 emit_constant_insn (cond
,
4718 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4719 emit_constant_insn (cond
,
4720 gen_ashrsi3 (target
, new_src
,
4721 GEN_INT (set_sign_bit_copies
- 1)));
4725 /* For an inverted constant, we will need to set the low bits,
4726 these will be shifted out of harm's way. */
4727 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4728 if (const_ok_for_arm (~temp1
))
4732 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4733 emit_constant_insn (cond
,
4734 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4735 emit_constant_insn (cond
,
4736 gen_ashrsi3 (target
, new_src
,
4737 GEN_INT (set_sign_bit_copies
- 1)));
4743 /* See if we can calculate the value as the difference between two
4744 valid immediates. */
4745 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4747 int topshift
= clear_sign_bit_copies
& ~1;
4749 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4750 & (0xff000000 >> topshift
));
4752 /* If temp1 is zero, then that means the 9 most significant
4753 bits of remainder were 1 and we've caused it to overflow.
4754 When topshift is 0 we don't need to do anything since we
4755 can borrow from 'bit 32'. */
4756 if (temp1
== 0 && topshift
!= 0)
4757 temp1
= 0x80000000 >> (topshift
- 1);
4759 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4761 if (const_ok_for_arm (temp2
))
4765 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4766 emit_constant_insn (cond
,
4767 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4768 emit_constant_insn (cond
,
4769 gen_addsi3 (target
, new_src
,
4777 /* See if we can generate this by setting the bottom (or the top)
4778 16 bits, and then shifting these into the other half of the
4779 word. We only look for the simplest cases, to do more would cost
4780 too much. Be careful, however, not to generate this when the
4781 alternative would take fewer insns. */
4782 if (val
& 0xffff0000)
4784 temp1
= remainder
& 0xffff0000;
4785 temp2
= remainder
& 0x0000ffff;
4787 /* Overlaps outside this range are best done using other methods. */
4788 for (i
= 9; i
< 24; i
++)
4790 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4791 && !const_ok_for_arm (temp2
))
4793 rtx new_src
= (subtargets
4794 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4796 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4797 source
, subtargets
, generate
);
4805 gen_rtx_ASHIFT (mode
, source
,
4812 /* Don't duplicate cases already considered. */
4813 for (i
= 17; i
< 24; i
++)
4815 if (((temp1
| (temp1
>> i
)) == remainder
)
4816 && !const_ok_for_arm (temp1
))
4818 rtx new_src
= (subtargets
4819 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4821 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4822 source
, subtargets
, generate
);
4827 gen_rtx_SET (target
,
4830 gen_rtx_LSHIFTRT (mode
, source
,
4841 /* If we have IOR or XOR, and the constant can be loaded in a
4842 single instruction, and we can find a temporary to put it in,
4843 then this can be done in two instructions instead of 3-4. */
4845 /* TARGET can't be NULL if SUBTARGETS is 0 */
4846 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4848 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4852 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4854 emit_constant_insn (cond
,
4855 gen_rtx_SET (sub
, GEN_INT (val
)));
4856 emit_constant_insn (cond
,
4857 gen_rtx_SET (target
,
4858 gen_rtx_fmt_ee (code
, mode
,
4869 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4870 and the remainder 0s for e.g. 0xfff00000)
4871 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4873 This can be done in 2 instructions by using shifts with mov or mvn.
4878 mvn r0, r0, lsr #12 */
4879 if (set_sign_bit_copies
> 8
4880 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4884 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4885 rtx shift
= GEN_INT (set_sign_bit_copies
);
4891 gen_rtx_ASHIFT (mode
,
4896 gen_rtx_SET (target
,
4898 gen_rtx_LSHIFTRT (mode
, sub
,
4905 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4907 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4909 For eg. r0 = r0 | 0xfff
4914 if (set_zero_bit_copies
> 8
4915 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4919 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4920 rtx shift
= GEN_INT (set_zero_bit_copies
);
4926 gen_rtx_LSHIFTRT (mode
,
4931 gen_rtx_SET (target
,
4933 gen_rtx_ASHIFT (mode
, sub
,
4939 /* This will never be reached for Thumb2 because orn is a valid
4940 instruction. This is for Thumb1 and the ARM 32 bit cases.
4942 x = y | constant (such that ~constant is a valid constant)
4944 x = ~(~y & ~constant).
4946 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4950 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4951 emit_constant_insn (cond
,
4953 gen_rtx_NOT (mode
, source
)));
4956 sub
= gen_reg_rtx (mode
);
4957 emit_constant_insn (cond
,
4959 gen_rtx_AND (mode
, source
,
4961 emit_constant_insn (cond
,
4962 gen_rtx_SET (target
,
4963 gen_rtx_NOT (mode
, sub
)));
4970 /* See if two shifts will do 2 or more insn's worth of work. */
4971 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
4973 HOST_WIDE_INT shift_mask
= ((0xffffffff
4974 << (32 - clear_sign_bit_copies
))
4977 if ((remainder
| shift_mask
) != 0xffffffff)
4979 HOST_WIDE_INT new_val
4980 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
4984 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4985 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
4986 new_src
, source
, subtargets
, 1);
4991 rtx targ
= subtargets
? NULL_RTX
: target
;
4992 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
4993 targ
, source
, subtargets
, 0);
4999 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5000 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5002 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5003 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5009 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5011 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5013 if ((remainder
| shift_mask
) != 0xffffffff)
5015 HOST_WIDE_INT new_val
5016 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5019 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5021 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5022 new_src
, source
, subtargets
, 1);
5027 rtx targ
= subtargets
? NULL_RTX
: target
;
5029 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5030 targ
, source
, subtargets
, 0);
5036 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5037 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5039 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5040 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5052 /* Calculate what the instruction sequences would be if we generated it
5053 normally, negated, or inverted. */
5055 /* AND cannot be split into multiple insns, so invert and use BIC. */
5058 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5061 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5066 if (can_invert
|| final_invert
)
5067 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5072 immediates
= &pos_immediates
;
5074 /* Is the negated immediate sequence more efficient? */
5075 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5078 immediates
= &neg_immediates
;
5083 /* Is the inverted immediate sequence more efficient?
5084 We must allow for an extra NOT instruction for XOR operations, although
5085 there is some chance that the final 'mvn' will get optimized later. */
5086 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5089 immediates
= &inv_immediates
;
5097 /* Now output the chosen sequence as instructions. */
5100 for (i
= 0; i
< insns
; i
++)
5102 rtx new_src
, temp1_rtx
;
5104 temp1
= immediates
->i
[i
];
5106 if (code
== SET
|| code
== MINUS
)
5107 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5108 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5109 new_src
= gen_reg_rtx (mode
);
5115 else if (can_negate
)
5118 temp1
= trunc_int_for_mode (temp1
, mode
);
5119 temp1_rtx
= GEN_INT (temp1
);
5123 else if (code
== MINUS
)
5124 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5126 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5128 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5133 can_negate
= can_invert
;
5137 else if (code
== MINUS
)
5145 emit_constant_insn (cond
, gen_rtx_SET (target
,
5146 gen_rtx_NOT (mode
, source
)));
5153 /* Canonicalize a comparison so that we are more likely to recognize it.
5154 This can be done for a few constant compares, where we can make the
5155 immediate value easier to load. */
5158 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5159 bool op0_preserve_value
)
5162 unsigned HOST_WIDE_INT i
, maxval
;
5164 mode
= GET_MODE (*op0
);
5165 if (mode
== VOIDmode
)
5166 mode
= GET_MODE (*op1
);
5168 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5170 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5171 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5172 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5173 for GTU/LEU in Thumb mode. */
5177 if (*code
== GT
|| *code
== LE
5178 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5180 /* Missing comparison. First try to use an available
5182 if (CONST_INT_P (*op1
))
5190 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5192 *op1
= GEN_INT (i
+ 1);
5193 *code
= *code
== GT
? GE
: LT
;
5199 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5200 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5202 *op1
= GEN_INT (i
+ 1);
5203 *code
= *code
== GTU
? GEU
: LTU
;
5212 /* If that did not work, reverse the condition. */
5213 if (!op0_preserve_value
)
5215 std::swap (*op0
, *op1
);
5216 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5222 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5223 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5224 to facilitate possible combining with a cmp into 'ands'. */
5226 && GET_CODE (*op0
) == ZERO_EXTEND
5227 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5228 && GET_MODE (XEXP (*op0
, 0)) == QImode
5229 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5230 && subreg_lowpart_p (XEXP (*op0
, 0))
5231 && *op1
== const0_rtx
)
5232 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5235 /* Comparisons smaller than DImode. Only adjust comparisons against
5236 an out-of-range constant. */
5237 if (!CONST_INT_P (*op1
)
5238 || const_ok_for_arm (INTVAL (*op1
))
5239 || const_ok_for_arm (- INTVAL (*op1
)))
5253 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5255 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5256 *code
= *code
== GT
? GE
: LT
;
5264 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5266 *op1
= GEN_INT (i
- 1);
5267 *code
= *code
== GE
? GT
: LE
;
5274 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5275 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5277 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5278 *code
= *code
== GTU
? GEU
: LTU
;
5286 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5288 *op1
= GEN_INT (i
- 1);
5289 *code
= *code
== GEU
? GTU
: LEU
;
5300 /* Define how to find the value returned by a function. */
5303 arm_function_value(const_tree type
, const_tree func
,
5304 bool outgoing ATTRIBUTE_UNUSED
)
5307 int unsignedp ATTRIBUTE_UNUSED
;
5308 rtx r ATTRIBUTE_UNUSED
;
5310 mode
= TYPE_MODE (type
);
5312 if (TARGET_AAPCS_BASED
)
5313 return aapcs_allocate_return_reg (mode
, type
, func
);
5315 /* Promote integer types. */
5316 if (INTEGRAL_TYPE_P (type
))
5317 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5319 /* Promotes small structs returned in a register to full-word size
5320 for big-endian AAPCS. */
5321 if (arm_return_in_msb (type
))
5323 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5324 if (size
% UNITS_PER_WORD
!= 0)
5326 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5327 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
5331 return arm_libcall_value_1 (mode
);
5334 /* libcall hashtable helpers. */
5336 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5338 static inline hashval_t
hash (const rtx_def
*);
5339 static inline bool equal (const rtx_def
*, const rtx_def
*);
5340 static inline void remove (rtx_def
*);
5344 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5346 return rtx_equal_p (p1
, p2
);
5350 libcall_hasher::hash (const rtx_def
*p1
)
5352 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5355 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5358 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5360 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5364 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5366 static bool init_done
= false;
5367 static libcall_table_type
*libcall_htab
= NULL
;
5373 libcall_htab
= new libcall_table_type (31);
5374 add_libcall (libcall_htab
,
5375 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5376 add_libcall (libcall_htab
,
5377 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5378 add_libcall (libcall_htab
,
5379 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5380 add_libcall (libcall_htab
,
5381 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5383 add_libcall (libcall_htab
,
5384 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5385 add_libcall (libcall_htab
,
5386 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5387 add_libcall (libcall_htab
,
5388 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5389 add_libcall (libcall_htab
,
5390 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5392 add_libcall (libcall_htab
,
5393 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5394 add_libcall (libcall_htab
,
5395 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5396 add_libcall (libcall_htab
,
5397 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5398 add_libcall (libcall_htab
,
5399 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5400 add_libcall (libcall_htab
,
5401 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5402 add_libcall (libcall_htab
,
5403 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5404 add_libcall (libcall_htab
,
5405 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5406 add_libcall (libcall_htab
,
5407 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5409 /* Values from double-precision helper functions are returned in core
5410 registers if the selected core only supports single-precision
5411 arithmetic, even if we are using the hard-float ABI. The same is
5412 true for single-precision helpers, but we will never be using the
5413 hard-float ABI on a CPU which doesn't support single-precision
5414 operations in hardware. */
5415 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5416 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5417 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5418 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5419 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5420 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5421 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5422 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5423 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5424 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5425 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5426 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5428 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5430 add_libcall (libcall_htab
,
5431 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5434 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5438 arm_libcall_value_1 (machine_mode mode
)
5440 if (TARGET_AAPCS_BASED
)
5441 return aapcs_libcall_value (mode
);
5442 else if (TARGET_IWMMXT_ABI
5443 && arm_vector_mode_supported_p (mode
))
5444 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5446 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5449 /* Define how to find the value returned by a library function
5450 assuming the value has mode MODE. */
5453 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5455 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5456 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5458 /* The following libcalls return their result in integer registers,
5459 even though they return a floating point value. */
5460 if (arm_libcall_uses_aapcs_base (libcall
))
5461 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5465 return arm_libcall_value_1 (mode
);
5468 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5471 arm_function_value_regno_p (const unsigned int regno
)
5473 if (regno
== ARG_REGISTER (1)
5475 && TARGET_AAPCS_BASED
5476 && TARGET_HARD_FLOAT
5477 && regno
== FIRST_VFP_REGNUM
)
5478 || (TARGET_IWMMXT_ABI
5479 && regno
== FIRST_IWMMXT_REGNUM
))
5485 /* Determine the amount of memory needed to store the possible return
5486 registers of an untyped call. */
5488 arm_apply_result_size (void)
5494 if (TARGET_HARD_FLOAT_ABI
)
5496 if (TARGET_IWMMXT_ABI
)
5503 /* Decide whether TYPE should be returned in memory (true)
5504 or in a register (false). FNTYPE is the type of the function making
5507 arm_return_in_memory (const_tree type
, const_tree fntype
)
5511 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5513 if (TARGET_AAPCS_BASED
)
5515 /* Simple, non-aggregate types (ie not including vectors and
5516 complex) are always returned in a register (or registers).
5517 We don't care about which register here, so we can short-cut
5518 some of the detail. */
5519 if (!AGGREGATE_TYPE_P (type
)
5520 && TREE_CODE (type
) != VECTOR_TYPE
5521 && TREE_CODE (type
) != COMPLEX_TYPE
)
5524 /* Any return value that is no larger than one word can be
5526 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5529 /* Check any available co-processors to see if they accept the
5530 type as a register candidate (VFP, for example, can return
5531 some aggregates in consecutive registers). These aren't
5532 available if the call is variadic. */
5533 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5536 /* Vector values should be returned using ARM registers, not
5537 memory (unless they're over 16 bytes, which will break since
5538 we only have four call-clobbered registers to play with). */
5539 if (TREE_CODE (type
) == VECTOR_TYPE
)
5540 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5542 /* The rest go in memory. */
5546 if (TREE_CODE (type
) == VECTOR_TYPE
)
5547 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5549 if (!AGGREGATE_TYPE_P (type
) &&
5550 (TREE_CODE (type
) != VECTOR_TYPE
))
5551 /* All simple types are returned in registers. */
5554 if (arm_abi
!= ARM_ABI_APCS
)
5556 /* ATPCS and later return aggregate types in memory only if they are
5557 larger than a word (or are variable size). */
5558 return (size
< 0 || size
> UNITS_PER_WORD
);
5561 /* For the arm-wince targets we choose to be compatible with Microsoft's
5562 ARM and Thumb compilers, which always return aggregates in memory. */
5564 /* All structures/unions bigger than one word are returned in memory.
5565 Also catch the case where int_size_in_bytes returns -1. In this case
5566 the aggregate is either huge or of variable size, and in either case
5567 we will want to return it via memory and not in a register. */
5568 if (size
< 0 || size
> UNITS_PER_WORD
)
5571 if (TREE_CODE (type
) == RECORD_TYPE
)
5575 /* For a struct the APCS says that we only return in a register
5576 if the type is 'integer like' and every addressable element
5577 has an offset of zero. For practical purposes this means
5578 that the structure can have at most one non bit-field element
5579 and that this element must be the first one in the structure. */
5581 /* Find the first field, ignoring non FIELD_DECL things which will
5582 have been created by C++. */
5583 for (field
= TYPE_FIELDS (type
);
5584 field
&& TREE_CODE (field
) != FIELD_DECL
;
5585 field
= DECL_CHAIN (field
))
5589 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5591 /* Check that the first field is valid for returning in a register. */
5593 /* ... Floats are not allowed */
5594 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5597 /* ... Aggregates that are not themselves valid for returning in
5598 a register are not allowed. */
5599 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5602 /* Now check the remaining fields, if any. Only bitfields are allowed,
5603 since they are not addressable. */
5604 for (field
= DECL_CHAIN (field
);
5606 field
= DECL_CHAIN (field
))
5608 if (TREE_CODE (field
) != FIELD_DECL
)
5611 if (!DECL_BIT_FIELD_TYPE (field
))
5618 if (TREE_CODE (type
) == UNION_TYPE
)
5622 /* Unions can be returned in registers if every element is
5623 integral, or can be returned in an integer register. */
5624 for (field
= TYPE_FIELDS (type
);
5626 field
= DECL_CHAIN (field
))
5628 if (TREE_CODE (field
) != FIELD_DECL
)
5631 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5634 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5640 #endif /* not ARM_WINCE */
5642 /* Return all other types in memory. */
5646 const struct pcs_attribute_arg
5650 } pcs_attribute_args
[] =
5652 {"aapcs", ARM_PCS_AAPCS
},
5653 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5655 /* We could recognize these, but changes would be needed elsewhere
5656 * to implement them. */
5657 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5658 {"atpcs", ARM_PCS_ATPCS
},
5659 {"apcs", ARM_PCS_APCS
},
5661 {NULL
, ARM_PCS_UNKNOWN
}
5665 arm_pcs_from_attribute (tree attr
)
5667 const struct pcs_attribute_arg
*ptr
;
5670 /* Get the value of the argument. */
5671 if (TREE_VALUE (attr
) == NULL_TREE
5672 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5673 return ARM_PCS_UNKNOWN
;
5675 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5677 /* Check it against the list of known arguments. */
5678 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5679 if (streq (arg
, ptr
->arg
))
5682 /* An unrecognized interrupt type. */
5683 return ARM_PCS_UNKNOWN
;
5686 /* Get the PCS variant to use for this call. TYPE is the function's type
5687 specification, DECL is the specific declartion. DECL may be null if
5688 the call could be indirect or if this is a library call. */
5690 arm_get_pcs_model (const_tree type
, const_tree decl
)
5692 bool user_convention
= false;
5693 enum arm_pcs user_pcs
= arm_pcs_default
;
5698 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5701 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5702 user_convention
= true;
5705 if (TARGET_AAPCS_BASED
)
5707 /* Detect varargs functions. These always use the base rules
5708 (no argument is ever a candidate for a co-processor
5710 bool base_rules
= stdarg_p (type
);
5712 if (user_convention
)
5714 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5715 sorry ("non-AAPCS derived PCS variant");
5716 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5717 error ("variadic functions must use the base AAPCS variant");
5721 return ARM_PCS_AAPCS
;
5722 else if (user_convention
)
5724 else if (decl
&& flag_unit_at_a_time
)
5726 /* Local functions never leak outside this compilation unit,
5727 so we are free to use whatever conventions are
5729 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5730 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5732 return ARM_PCS_AAPCS_LOCAL
;
5735 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5736 sorry ("PCS variant");
5738 /* For everything else we use the target's default. */
5739 return arm_pcs_default
;
5744 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5745 const_tree fntype ATTRIBUTE_UNUSED
,
5746 rtx libcall ATTRIBUTE_UNUSED
,
5747 const_tree fndecl ATTRIBUTE_UNUSED
)
5749 /* Record the unallocated VFP registers. */
5750 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5751 pcum
->aapcs_vfp_reg_alloc
= 0;
5754 /* Walk down the type tree of TYPE counting consecutive base elements.
5755 If *MODEP is VOIDmode, then set it to the first valid floating point
5756 type. If a non-floating point type is found, or if a floating point
5757 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5758 otherwise return the count in the sub-tree. */
5760 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5765 switch (TREE_CODE (type
))
5768 mode
= TYPE_MODE (type
);
5769 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5772 if (*modep
== VOIDmode
)
5781 mode
= TYPE_MODE (TREE_TYPE (type
));
5782 if (mode
!= DFmode
&& mode
!= SFmode
)
5785 if (*modep
== VOIDmode
)
5794 /* Use V2SImode and V4SImode as representatives of all 64-bit
5795 and 128-bit vector types, whether or not those modes are
5796 supported with the present options. */
5797 size
= int_size_in_bytes (type
);
5810 if (*modep
== VOIDmode
)
5813 /* Vector modes are considered to be opaque: two vectors are
5814 equivalent for the purposes of being homogeneous aggregates
5815 if they are the same size. */
5824 tree index
= TYPE_DOMAIN (type
);
5826 /* Can't handle incomplete types nor sizes that are not
5828 if (!COMPLETE_TYPE_P (type
)
5829 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5832 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5835 || !TYPE_MAX_VALUE (index
)
5836 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5837 || !TYPE_MIN_VALUE (index
)
5838 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5842 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5843 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5845 /* There must be no padding. */
5846 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5858 /* Can't handle incomplete types nor sizes that are not
5860 if (!COMPLETE_TYPE_P (type
)
5861 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5864 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5866 if (TREE_CODE (field
) != FIELD_DECL
)
5869 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5875 /* There must be no padding. */
5876 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5883 case QUAL_UNION_TYPE
:
5885 /* These aren't very interesting except in a degenerate case. */
5890 /* Can't handle incomplete types nor sizes that are not
5892 if (!COMPLETE_TYPE_P (type
)
5893 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5896 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5898 if (TREE_CODE (field
) != FIELD_DECL
)
5901 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5904 count
= count
> sub_count
? count
: sub_count
;
5907 /* There must be no padding. */
5908 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
5921 /* Return true if PCS_VARIANT should use VFP registers. */
5923 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5925 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5927 static bool seen_thumb1_vfp
= false;
5929 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5931 sorry ("Thumb-1 hard-float VFP ABI");
5932 /* sorry() is not immediately fatal, so only display this once. */
5933 seen_thumb1_vfp
= true;
5939 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5942 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5943 (TARGET_VFP_DOUBLE
|| !is_double
));
5946 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5947 suitable for passing or returning in VFP registers for the PCS
5948 variant selected. If it is, then *BASE_MODE is updated to contain
5949 a machine mode describing each element of the argument's type and
5950 *COUNT to hold the number of such elements. */
5952 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5953 machine_mode mode
, const_tree type
,
5954 machine_mode
*base_mode
, int *count
)
5956 machine_mode new_mode
= VOIDmode
;
5958 /* If we have the type information, prefer that to working things
5959 out from the mode. */
5962 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5964 if (ag_count
> 0 && ag_count
<= 4)
5969 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5970 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5971 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5976 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5979 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
5985 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
5988 *base_mode
= new_mode
;
5993 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
5994 machine_mode mode
, const_tree type
)
5996 int count ATTRIBUTE_UNUSED
;
5997 machine_mode ag_mode ATTRIBUTE_UNUSED
;
5999 if (!use_vfp_abi (pcs_variant
, false))
6001 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6006 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6009 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6012 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6013 &pcum
->aapcs_vfp_rmode
,
6014 &pcum
->aapcs_vfp_rcount
);
6017 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6018 for the behaviour of this function. */
6021 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6022 const_tree type ATTRIBUTE_UNUSED
)
6025 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6026 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6027 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6030 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6031 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6033 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6035 || (mode
== TImode
&& ! TARGET_NEON
)
6036 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6039 int rcount
= pcum
->aapcs_vfp_rcount
;
6041 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6045 /* Avoid using unsupported vector modes. */
6046 if (rmode
== V2SImode
)
6048 else if (rmode
== V4SImode
)
6055 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6056 for (i
= 0; i
< rcount
; i
++)
6058 rtx tmp
= gen_rtx_REG (rmode
,
6059 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6060 tmp
= gen_rtx_EXPR_LIST
6062 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6063 XVECEXP (par
, 0, i
) = tmp
;
6066 pcum
->aapcs_reg
= par
;
6069 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6075 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6076 comment there for the behaviour of this function. */
6079 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6081 const_tree type ATTRIBUTE_UNUSED
)
6083 if (!use_vfp_abi (pcs_variant
, false))
6087 || (GET_MODE_CLASS (mode
) == MODE_INT
6088 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6092 machine_mode ag_mode
;
6097 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6102 if (ag_mode
== V2SImode
)
6104 else if (ag_mode
== V4SImode
)
6110 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6111 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6112 for (i
= 0; i
< count
; i
++)
6114 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6115 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6116 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6117 XVECEXP (par
, 0, i
) = tmp
;
6123 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6127 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6128 machine_mode mode ATTRIBUTE_UNUSED
,
6129 const_tree type ATTRIBUTE_UNUSED
)
6131 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6132 pcum
->aapcs_vfp_reg_alloc
= 0;
6136 #define AAPCS_CP(X) \
6138 aapcs_ ## X ## _cum_init, \
6139 aapcs_ ## X ## _is_call_candidate, \
6140 aapcs_ ## X ## _allocate, \
6141 aapcs_ ## X ## _is_return_candidate, \
6142 aapcs_ ## X ## _allocate_return_reg, \
6143 aapcs_ ## X ## _advance \
6146 /* Table of co-processors that can be used to pass arguments in
6147 registers. Idealy no arugment should be a candidate for more than
6148 one co-processor table entry, but the table is processed in order
6149 and stops after the first match. If that entry then fails to put
6150 the argument into a co-processor register, the argument will go on
6154 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6155 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6157 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6158 BLKmode) is a candidate for this co-processor's registers; this
6159 function should ignore any position-dependent state in
6160 CUMULATIVE_ARGS and only use call-type dependent information. */
6161 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6163 /* Return true if the argument does get a co-processor register; it
6164 should set aapcs_reg to an RTX of the register allocated as is
6165 required for a return from FUNCTION_ARG. */
6166 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6168 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6169 be returned in this co-processor's registers. */
6170 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6172 /* Allocate and return an RTX element to hold the return type of a call. This
6173 routine must not fail and will only be called if is_return_candidate
6174 returned true with the same parameters. */
6175 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6177 /* Finish processing this argument and prepare to start processing
6179 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6180 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6188 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6193 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6194 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6201 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6203 /* We aren't passed a decl, so we can't check that a call is local.
6204 However, it isn't clear that that would be a win anyway, since it
6205 might limit some tail-calling opportunities. */
6206 enum arm_pcs pcs_variant
;
6210 const_tree fndecl
= NULL_TREE
;
6212 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6215 fntype
= TREE_TYPE (fntype
);
6218 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6221 pcs_variant
= arm_pcs_default
;
6223 if (pcs_variant
!= ARM_PCS_AAPCS
)
6227 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6228 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6237 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6240 /* We aren't passed a decl, so we can't check that a call is local.
6241 However, it isn't clear that that would be a win anyway, since it
6242 might limit some tail-calling opportunities. */
6243 enum arm_pcs pcs_variant
;
6244 int unsignedp ATTRIBUTE_UNUSED
;
6248 const_tree fndecl
= NULL_TREE
;
6250 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6253 fntype
= TREE_TYPE (fntype
);
6256 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6259 pcs_variant
= arm_pcs_default
;
6261 /* Promote integer types. */
6262 if (type
&& INTEGRAL_TYPE_P (type
))
6263 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6265 if (pcs_variant
!= ARM_PCS_AAPCS
)
6269 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6270 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6272 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6276 /* Promotes small structs returned in a register to full-word size
6277 for big-endian AAPCS. */
6278 if (type
&& arm_return_in_msb (type
))
6280 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6281 if (size
% UNITS_PER_WORD
!= 0)
6283 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6284 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
6288 return gen_rtx_REG (mode
, R0_REGNUM
);
6292 aapcs_libcall_value (machine_mode mode
)
6294 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6295 && GET_MODE_SIZE (mode
) <= 4)
6298 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6301 /* Lay out a function argument using the AAPCS rules. The rule
6302 numbers referred to here are those in the AAPCS. */
6304 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6305 const_tree type
, bool named
)
6310 /* We only need to do this once per argument. */
6311 if (pcum
->aapcs_arg_processed
)
6314 pcum
->aapcs_arg_processed
= true;
6316 /* Special case: if named is false then we are handling an incoming
6317 anonymous argument which is on the stack. */
6321 /* Is this a potential co-processor register candidate? */
6322 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6324 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6325 pcum
->aapcs_cprc_slot
= slot
;
6327 /* We don't have to apply any of the rules from part B of the
6328 preparation phase, these are handled elsewhere in the
6333 /* A Co-processor register candidate goes either in its own
6334 class of registers or on the stack. */
6335 if (!pcum
->aapcs_cprc_failed
[slot
])
6337 /* C1.cp - Try to allocate the argument to co-processor
6339 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6342 /* C2.cp - Put the argument on the stack and note that we
6343 can't assign any more candidates in this slot. We also
6344 need to note that we have allocated stack space, so that
6345 we won't later try to split a non-cprc candidate between
6346 core registers and the stack. */
6347 pcum
->aapcs_cprc_failed
[slot
] = true;
6348 pcum
->can_split
= false;
6351 /* We didn't get a register, so this argument goes on the
6353 gcc_assert (pcum
->can_split
== false);
6358 /* C3 - For double-word aligned arguments, round the NCRN up to the
6359 next even number. */
6360 ncrn
= pcum
->aapcs_ncrn
;
6363 int res
= arm_needs_doubleword_align (mode
, type
);
6364 /* Only warn during RTL expansion of call stmts, otherwise we would
6365 warn e.g. during gimplification even on functions that will be
6366 always inlined, and we'd warn multiple times. Don't warn when
6367 called in expand_function_start either, as we warn instead in
6368 arm_function_arg_boundary in that case. */
6369 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6370 inform (input_location
, "parameter passing for argument of type "
6371 "%qT changed in GCC 7.1", type
);
6376 nregs
= ARM_NUM_REGS2(mode
, type
);
6378 /* Sigh, this test should really assert that nregs > 0, but a GCC
6379 extension allows empty structs and then gives them empty size; it
6380 then allows such a structure to be passed by value. For some of
6381 the code below we have to pretend that such an argument has
6382 non-zero size so that we 'locate' it correctly either in
6383 registers or on the stack. */
6384 gcc_assert (nregs
>= 0);
6386 nregs2
= nregs
? nregs
: 1;
6388 /* C4 - Argument fits entirely in core registers. */
6389 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6391 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6392 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6396 /* C5 - Some core registers left and there are no arguments already
6397 on the stack: split this argument between the remaining core
6398 registers and the stack. */
6399 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6401 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6402 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6403 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6407 /* C6 - NCRN is set to 4. */
6408 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6410 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6414 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6415 for a call to a function whose data type is FNTYPE.
6416 For a library call, FNTYPE is NULL. */
6418 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6420 tree fndecl ATTRIBUTE_UNUSED
)
6422 /* Long call handling. */
6424 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6426 pcum
->pcs_variant
= arm_pcs_default
;
6428 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6430 if (arm_libcall_uses_aapcs_base (libname
))
6431 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6433 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6434 pcum
->aapcs_reg
= NULL_RTX
;
6435 pcum
->aapcs_partial
= 0;
6436 pcum
->aapcs_arg_processed
= false;
6437 pcum
->aapcs_cprc_slot
= -1;
6438 pcum
->can_split
= true;
6440 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6444 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6446 pcum
->aapcs_cprc_failed
[i
] = false;
6447 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6455 /* On the ARM, the offset starts at 0. */
6457 pcum
->iwmmxt_nregs
= 0;
6458 pcum
->can_split
= true;
6460 /* Varargs vectors are treated the same as long long.
6461 named_count avoids having to change the way arm handles 'named' */
6462 pcum
->named_count
= 0;
6465 if (TARGET_REALLY_IWMMXT
&& fntype
)
6469 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6471 fn_arg
= TREE_CHAIN (fn_arg
))
6472 pcum
->named_count
+= 1;
6474 if (! pcum
->named_count
)
6475 pcum
->named_count
= INT_MAX
;
6479 /* Return 1 if double word alignment is required for argument passing.
6480 Return -1 if double word alignment used to be required for argument
6481 passing before PR77728 ABI fix, but is not required anymore.
6482 Return 0 if double word alignment is not required and wasn't requried
6485 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6488 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6490 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6491 if (!AGGREGATE_TYPE_P (type
))
6492 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6494 /* Array types: Use member alignment of element type. */
6495 if (TREE_CODE (type
) == ARRAY_TYPE
)
6496 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6499 /* Record/aggregate types: Use greatest member alignment of any member. */
6500 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6501 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6503 if (TREE_CODE (field
) == FIELD_DECL
)
6506 /* Before PR77728 fix, we were incorrectly considering also
6507 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6508 Make sure we can warn about that with -Wpsabi. */
6516 /* Determine where to put an argument to a function.
6517 Value is zero to push the argument on the stack,
6518 or a hard register in which to store the argument.
6520 MODE is the argument's machine mode.
6521 TYPE is the data type of the argument (as a tree).
6522 This is null for libcalls where that information may
6524 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6525 the preceding args and about the function being called.
6526 NAMED is nonzero if this argument is a named parameter
6527 (otherwise it is an extra parameter matching an ellipsis).
6529 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6530 other arguments are passed on the stack. If (NAMED == 0) (which happens
6531 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6532 defined), say it is passed in the stack (function_prologue will
6533 indeed make it pass in the stack if necessary). */
6536 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6537 const_tree type
, bool named
)
6539 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6542 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6543 a call insn (op3 of a call_value insn). */
6544 if (mode
== VOIDmode
)
6547 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6549 aapcs_layout_arg (pcum
, mode
, type
, named
);
6550 return pcum
->aapcs_reg
;
6553 /* Varargs vectors are treated the same as long long.
6554 named_count avoids having to change the way arm handles 'named' */
6555 if (TARGET_IWMMXT_ABI
6556 && arm_vector_mode_supported_p (mode
)
6557 && pcum
->named_count
> pcum
->nargs
+ 1)
6559 if (pcum
->iwmmxt_nregs
<= 9)
6560 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6563 pcum
->can_split
= false;
6568 /* Put doubleword aligned quantities in even register pairs. */
6569 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6571 int res
= arm_needs_doubleword_align (mode
, type
);
6572 if (res
< 0 && warn_psabi
)
6573 inform (input_location
, "parameter passing for argument of type "
6574 "%qT changed in GCC 7.1", type
);
6579 /* Only allow splitting an arg between regs and memory if all preceding
6580 args were allocated to regs. For args passed by reference we only count
6581 the reference pointer. */
6582 if (pcum
->can_split
)
6585 nregs
= ARM_NUM_REGS2 (mode
, type
);
6587 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6590 return gen_rtx_REG (mode
, pcum
->nregs
);
6594 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6596 if (!ARM_DOUBLEWORD_ALIGN
)
6597 return PARM_BOUNDARY
;
6599 int res
= arm_needs_doubleword_align (mode
, type
);
6600 if (res
< 0 && warn_psabi
)
6601 inform (input_location
, "parameter passing for argument of type %qT "
6602 "changed in GCC 7.1", type
);
6604 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6608 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6609 tree type
, bool named
)
6611 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6612 int nregs
= pcum
->nregs
;
6614 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6616 aapcs_layout_arg (pcum
, mode
, type
, named
);
6617 return pcum
->aapcs_partial
;
6620 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6623 if (NUM_ARG_REGS
> nregs
6624 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6626 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6631 /* Update the data in PCUM to advance over an argument
6632 of mode MODE and data type TYPE.
6633 (TYPE is null for libcalls where that information may not be available.) */
6636 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6637 const_tree type
, bool named
)
6639 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6641 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6643 aapcs_layout_arg (pcum
, mode
, type
, named
);
6645 if (pcum
->aapcs_cprc_slot
>= 0)
6647 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6649 pcum
->aapcs_cprc_slot
= -1;
6652 /* Generic stuff. */
6653 pcum
->aapcs_arg_processed
= false;
6654 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6655 pcum
->aapcs_reg
= NULL_RTX
;
6656 pcum
->aapcs_partial
= 0;
6661 if (arm_vector_mode_supported_p (mode
)
6662 && pcum
->named_count
> pcum
->nargs
6663 && TARGET_IWMMXT_ABI
)
6664 pcum
->iwmmxt_nregs
+= 1;
6666 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6670 /* Variable sized types are passed by reference. This is a GCC
6671 extension to the ARM ABI. */
6674 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6675 machine_mode mode ATTRIBUTE_UNUSED
,
6676 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6678 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6681 /* Encode the current state of the #pragma [no_]long_calls. */
6684 OFF
, /* No #pragma [no_]long_calls is in effect. */
6685 LONG
, /* #pragma long_calls is in effect. */
6686 SHORT
/* #pragma no_long_calls is in effect. */
6689 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6692 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6694 arm_pragma_long_calls
= LONG
;
6698 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6700 arm_pragma_long_calls
= SHORT
;
6704 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6706 arm_pragma_long_calls
= OFF
;
6709 /* Handle an attribute requiring a FUNCTION_DECL;
6710 arguments as in struct attribute_spec.handler. */
6712 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6713 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6715 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6717 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6719 *no_add_attrs
= true;
6725 /* Handle an "interrupt" or "isr" attribute;
6726 arguments as in struct attribute_spec.handler. */
6728 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6733 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6735 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6737 *no_add_attrs
= true;
6739 /* FIXME: the argument if any is checked for type attributes;
6740 should it be checked for decl ones? */
6744 if (TREE_CODE (*node
) == FUNCTION_TYPE
6745 || TREE_CODE (*node
) == METHOD_TYPE
)
6747 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6749 warning (OPT_Wattributes
, "%qE attribute ignored",
6751 *no_add_attrs
= true;
6754 else if (TREE_CODE (*node
) == POINTER_TYPE
6755 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6756 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6757 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6759 *node
= build_variant_type_copy (*node
);
6760 TREE_TYPE (*node
) = build_type_attribute_variant
6762 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6763 *no_add_attrs
= true;
6767 /* Possibly pass this attribute on from the type to a decl. */
6768 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6769 | (int) ATTR_FLAG_FUNCTION_NEXT
6770 | (int) ATTR_FLAG_ARRAY_NEXT
))
6772 *no_add_attrs
= true;
6773 return tree_cons (name
, args
, NULL_TREE
);
6777 warning (OPT_Wattributes
, "%qE attribute ignored",
6786 /* Handle a "pcs" attribute; arguments as in struct
6787 attribute_spec.handler. */
6789 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6790 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6792 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6794 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6795 *no_add_attrs
= true;
6800 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6801 /* Handle the "notshared" attribute. This attribute is another way of
6802 requesting hidden visibility. ARM's compiler supports
6803 "__declspec(notshared)"; we support the same thing via an
6807 arm_handle_notshared_attribute (tree
*node
,
6808 tree name ATTRIBUTE_UNUSED
,
6809 tree args ATTRIBUTE_UNUSED
,
6810 int flags ATTRIBUTE_UNUSED
,
6813 tree decl
= TYPE_NAME (*node
);
6817 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6818 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6819 *no_add_attrs
= false;
6825 /* This function returns true if a function with declaration FNDECL and type
6826 FNTYPE uses the stack to pass arguments or return variables and false
6827 otherwise. This is used for functions with the attributes
6828 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6829 diagnostic messages if the stack is used. NAME is the name of the attribute
6833 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6835 function_args_iterator args_iter
;
6836 CUMULATIVE_ARGS args_so_far_v
;
6837 cumulative_args_t args_so_far
;
6838 bool first_param
= true;
6839 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6841 /* Error out if any argument is passed on the stack. */
6842 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6843 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6844 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6847 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6849 prev_arg_type
= arg_type
;
6850 if (VOID_TYPE_P (arg_type
))
6854 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6855 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6857 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6859 error ("%qE attribute not available to functions with arguments "
6860 "passed on the stack", name
);
6863 first_param
= false;
6866 /* Error out for variadic functions since we cannot control how many
6867 arguments will be passed and thus stack could be used. stdarg_p () is not
6868 used for the checking to avoid browsing arguments twice. */
6869 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6871 error ("%qE attribute not available to functions with variable number "
6872 "of arguments", name
);
6876 /* Error out if return value is passed on the stack. */
6877 ret_type
= TREE_TYPE (fntype
);
6878 if (arm_return_in_memory (ret_type
, fntype
))
6880 error ("%qE attribute not available to functions that return value on "
6887 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6888 function will check whether the attribute is allowed here and will add the
6889 attribute to the function declaration tree or otherwise issue a warning. */
6892 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6901 *no_add_attrs
= true;
6902 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6907 /* Ignore attribute for function types. */
6908 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6910 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6912 *no_add_attrs
= true;
6918 /* Warn for static linkage functions. */
6919 if (!TREE_PUBLIC (fndecl
))
6921 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6922 "with static linkage", name
);
6923 *no_add_attrs
= true;
6927 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6928 TREE_TYPE (fndecl
));
6933 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6934 function will check whether the attribute is allowed here and will add the
6935 attribute to the function type tree or otherwise issue a diagnostic. The
6936 reason we check this at declaration time is to only allow the use of the
6937 attribute with declarations of function pointers and not function
6938 declarations. This function checks NODE is of the expected type and issues
6939 diagnostics otherwise using NAME. If it is not of the expected type
6940 *NO_ADD_ATTRS will be set to true. */
6943 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6948 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6953 *no_add_attrs
= true;
6954 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6959 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
6962 fntype
= TREE_TYPE (decl
);
6965 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
6966 fntype
= TREE_TYPE (fntype
);
6968 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
6970 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
6971 "function pointer", name
);
6972 *no_add_attrs
= true;
6976 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
6981 /* Prevent trees being shared among function types with and without
6982 cmse_nonsecure_call attribute. */
6983 type
= TREE_TYPE (decl
);
6985 type
= build_distinct_type_copy (type
);
6986 TREE_TYPE (decl
) = type
;
6989 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
6992 fntype
= TREE_TYPE (fntype
);
6993 fntype
= build_distinct_type_copy (fntype
);
6994 TREE_TYPE (type
) = fntype
;
6997 /* Construct a type attribute and add it to the function type. */
6998 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
6999 TYPE_ATTRIBUTES (fntype
));
7000 TYPE_ATTRIBUTES (fntype
) = attrs
;
7004 /* Return 0 if the attributes for two types are incompatible, 1 if they
7005 are compatible, and 2 if they are nearly compatible (which causes a
7006 warning to be generated). */
7008 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7012 /* Check for mismatch of non-default calling convention. */
7013 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7016 /* Check for mismatched call attributes. */
7017 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7018 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7019 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7020 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7022 /* Only bother to check if an attribute is defined. */
7023 if (l1
| l2
| s1
| s2
)
7025 /* If one type has an attribute, the other must have the same attribute. */
7026 if ((l1
!= l2
) || (s1
!= s2
))
7029 /* Disallow mixed attributes. */
7030 if ((l1
& s2
) || (l2
& s1
))
7034 /* Check for mismatched ISR attribute. */
7035 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7037 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7038 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7040 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7044 l1
= lookup_attribute ("cmse_nonsecure_call",
7045 TYPE_ATTRIBUTES (type1
)) != NULL
;
7046 l2
= lookup_attribute ("cmse_nonsecure_call",
7047 TYPE_ATTRIBUTES (type2
)) != NULL
;
7055 /* Assigns default attributes to newly defined type. This is used to
7056 set short_call/long_call attributes for function types of
7057 functions defined inside corresponding #pragma scopes. */
7059 arm_set_default_type_attributes (tree type
)
7061 /* Add __attribute__ ((long_call)) to all functions, when
7062 inside #pragma long_calls or __attribute__ ((short_call)),
7063 when inside #pragma no_long_calls. */
7064 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7066 tree type_attr_list
, attr_name
;
7067 type_attr_list
= TYPE_ATTRIBUTES (type
);
7069 if (arm_pragma_long_calls
== LONG
)
7070 attr_name
= get_identifier ("long_call");
7071 else if (arm_pragma_long_calls
== SHORT
)
7072 attr_name
= get_identifier ("short_call");
7076 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7077 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7081 /* Return true if DECL is known to be linked into section SECTION. */
7084 arm_function_in_section_p (tree decl
, section
*section
)
7086 /* We can only be certain about the prevailing symbol definition. */
7087 if (!decl_binds_to_current_def_p (decl
))
7090 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7091 if (!DECL_SECTION_NAME (decl
))
7093 /* Make sure that we will not create a unique section for DECL. */
7094 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7098 return function_section (decl
) == section
;
7101 /* Return nonzero if a 32-bit "long_call" should be generated for
7102 a call from the current function to DECL. We generate a long_call
7105 a. has an __attribute__((long call))
7106 or b. is within the scope of a #pragma long_calls
7107 or c. the -mlong-calls command line switch has been specified
7109 However we do not generate a long call if the function:
7111 d. has an __attribute__ ((short_call))
7112 or e. is inside the scope of a #pragma no_long_calls
7113 or f. is defined in the same section as the current function. */
7116 arm_is_long_call_p (tree decl
)
7121 return TARGET_LONG_CALLS
;
7123 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7124 if (lookup_attribute ("short_call", attrs
))
7127 /* For "f", be conservative, and only cater for cases in which the
7128 whole of the current function is placed in the same section. */
7129 if (!flag_reorder_blocks_and_partition
7130 && TREE_CODE (decl
) == FUNCTION_DECL
7131 && arm_function_in_section_p (decl
, current_function_section ()))
7134 if (lookup_attribute ("long_call", attrs
))
7137 return TARGET_LONG_CALLS
;
7140 /* Return nonzero if it is ok to make a tail-call to DECL. */
7142 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7144 unsigned long func_type
;
7146 if (cfun
->machine
->sibcall_blocked
)
7149 /* Never tailcall something if we are generating code for Thumb-1. */
7153 /* The PIC register is live on entry to VxWorks PLT entries, so we
7154 must make the call before restoring the PIC register. */
7155 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7158 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7159 may be used both as target of the call and base register for restoring
7160 the VFP registers */
7161 if (TARGET_APCS_FRAME
&& TARGET_ARM
7162 && TARGET_HARD_FLOAT
7163 && decl
&& arm_is_long_call_p (decl
))
7166 /* If we are interworking and the function is not declared static
7167 then we can't tail-call it unless we know that it exists in this
7168 compilation unit (since it might be a Thumb routine). */
7169 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7170 && !TREE_ASM_WRITTEN (decl
))
7173 func_type
= arm_current_func_type ();
7174 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7175 if (IS_INTERRUPT (func_type
))
7178 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7179 generated for entry functions themselves. */
7180 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7183 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7184 this would complicate matters for later code generation. */
7185 if (TREE_CODE (exp
) == CALL_EXPR
)
7187 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7188 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7192 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7194 /* Check that the return value locations are the same. For
7195 example that we aren't returning a value from the sibling in
7196 a VFP register but then need to transfer it to a core
7199 tree decl_or_type
= decl
;
7201 /* If it is an indirect function pointer, get the function type. */
7203 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7205 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7206 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7208 if (!rtx_equal_p (a
, b
))
7212 /* Never tailcall if function may be called with a misaligned SP. */
7213 if (IS_STACKALIGN (func_type
))
7216 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7217 references should become a NOP. Don't convert such calls into
7219 if (TARGET_AAPCS_BASED
7220 && arm_abi
== ARM_ABI_AAPCS
7222 && DECL_WEAK (decl
))
7225 /* We cannot do a tailcall for an indirect call by descriptor if all the
7226 argument registers are used because the only register left to load the
7227 address is IP and it will already contain the static chain. */
7228 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7230 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7231 CUMULATIVE_ARGS cum
;
7232 cumulative_args_t cum_v
;
7234 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7235 cum_v
= pack_cumulative_args (&cum
);
7237 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7239 tree type
= TREE_VALUE (t
);
7240 if (!VOID_TYPE_P (type
))
7241 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7244 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7248 /* Everything else is ok. */
7253 /* Addressing mode support functions. */
7255 /* Return nonzero if X is a legitimate immediate operand when compiling
7256 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7258 legitimate_pic_operand_p (rtx x
)
7260 if (GET_CODE (x
) == SYMBOL_REF
7261 || (GET_CODE (x
) == CONST
7262 && GET_CODE (XEXP (x
, 0)) == PLUS
7263 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7269 /* Record that the current function needs a PIC register. Initialize
7270 cfun->machine->pic_reg if we have not already done so. */
7273 require_pic_register (void)
7275 /* A lot of the logic here is made obscure by the fact that this
7276 routine gets called as part of the rtx cost estimation process.
7277 We don't want those calls to affect any assumptions about the real
7278 function; and further, we can't call entry_of_function() until we
7279 start the real expansion process. */
7280 if (!crtl
->uses_pic_offset_table
)
7282 gcc_assert (can_create_pseudo_p ());
7283 if (arm_pic_register
!= INVALID_REGNUM
7284 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7286 if (!cfun
->machine
->pic_reg
)
7287 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7289 /* Play games to avoid marking the function as needing pic
7290 if we are being called as part of the cost-estimation
7292 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7293 crtl
->uses_pic_offset_table
= 1;
7297 rtx_insn
*seq
, *insn
;
7299 if (!cfun
->machine
->pic_reg
)
7300 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7302 /* Play games to avoid marking the function as needing pic
7303 if we are being called as part of the cost-estimation
7305 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7307 crtl
->uses_pic_offset_table
= 1;
7310 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7311 && arm_pic_register
> LAST_LO_REGNUM
)
7312 emit_move_insn (cfun
->machine
->pic_reg
,
7313 gen_rtx_REG (Pmode
, arm_pic_register
));
7315 arm_load_pic_register (0UL);
7320 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7322 INSN_LOCATION (insn
) = prologue_location
;
7324 /* We can be called during expansion of PHI nodes, where
7325 we can't yet emit instructions directly in the final
7326 insn stream. Queue the insns on the entry edge, they will
7327 be committed after everything else is expanded. */
7328 insert_insn_on_edge (seq
,
7329 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7336 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7338 if (GET_CODE (orig
) == SYMBOL_REF
7339 || GET_CODE (orig
) == LABEL_REF
)
7343 gcc_assert (can_create_pseudo_p ());
7344 reg
= gen_reg_rtx (Pmode
);
7347 /* VxWorks does not impose a fixed gap between segments; the run-time
7348 gap can be different from the object-file gap. We therefore can't
7349 use GOTOFF unless we are absolutely sure that the symbol is in the
7350 same segment as the GOT. Unfortunately, the flexibility of linker
7351 scripts means that we can't be sure of that in general, so assume
7352 that GOTOFF is never valid on VxWorks. */
7353 /* References to weak symbols cannot be resolved locally: they
7354 may be overridden by a non-weak definition at link time. */
7356 if ((GET_CODE (orig
) == LABEL_REF
7357 || (GET_CODE (orig
) == SYMBOL_REF
7358 && SYMBOL_REF_LOCAL_P (orig
)
7359 && (SYMBOL_REF_DECL (orig
)
7360 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7362 && arm_pic_data_is_text_relative
)
7363 insn
= arm_pic_static_addr (orig
, reg
);
7369 /* If this function doesn't have a pic register, create one now. */
7370 require_pic_register ();
7372 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7374 /* Make the MEM as close to a constant as possible. */
7375 mem
= SET_SRC (pat
);
7376 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7377 MEM_READONLY_P (mem
) = 1;
7378 MEM_NOTRAP_P (mem
) = 1;
7380 insn
= emit_insn (pat
);
7383 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7385 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7389 else if (GET_CODE (orig
) == CONST
)
7393 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7394 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7397 /* Handle the case where we have: const (UNSPEC_TLS). */
7398 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7399 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7402 /* Handle the case where we have:
7403 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7405 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7406 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7407 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7409 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7415 gcc_assert (can_create_pseudo_p ());
7416 reg
= gen_reg_rtx (Pmode
);
7419 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7421 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7422 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7423 base
== reg
? 0 : reg
);
7425 if (CONST_INT_P (offset
))
7427 /* The base register doesn't really matter, we only want to
7428 test the index for the appropriate mode. */
7429 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7431 gcc_assert (can_create_pseudo_p ());
7432 offset
= force_reg (Pmode
, offset
);
7435 if (CONST_INT_P (offset
))
7436 return plus_constant (Pmode
, base
, INTVAL (offset
));
7439 if (GET_MODE_SIZE (mode
) > 4
7440 && (GET_MODE_CLASS (mode
) == MODE_INT
7441 || TARGET_SOFT_FLOAT
))
7443 emit_insn (gen_addsi3 (reg
, base
, offset
));
7447 return gen_rtx_PLUS (Pmode
, base
, offset
);
7454 /* Find a spare register to use during the prolog of a function. */
7457 thumb_find_work_register (unsigned long pushed_regs_mask
)
7461 /* Check the argument registers first as these are call-used. The
7462 register allocation order means that sometimes r3 might be used
7463 but earlier argument registers might not, so check them all. */
7464 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7465 if (!df_regs_ever_live_p (reg
))
7468 /* Before going on to check the call-saved registers we can try a couple
7469 more ways of deducing that r3 is available. The first is when we are
7470 pushing anonymous arguments onto the stack and we have less than 4
7471 registers worth of fixed arguments(*). In this case r3 will be part of
7472 the variable argument list and so we can be sure that it will be
7473 pushed right at the start of the function. Hence it will be available
7474 for the rest of the prologue.
7475 (*): ie crtl->args.pretend_args_size is greater than 0. */
7476 if (cfun
->machine
->uses_anonymous_args
7477 && crtl
->args
.pretend_args_size
> 0)
7478 return LAST_ARG_REGNUM
;
7480 /* The other case is when we have fixed arguments but less than 4 registers
7481 worth. In this case r3 might be used in the body of the function, but
7482 it is not being used to convey an argument into the function. In theory
7483 we could just check crtl->args.size to see how many bytes are
7484 being passed in argument registers, but it seems that it is unreliable.
7485 Sometimes it will have the value 0 when in fact arguments are being
7486 passed. (See testcase execute/20021111-1.c for an example). So we also
7487 check the args_info.nregs field as well. The problem with this field is
7488 that it makes no allowances for arguments that are passed to the
7489 function but which are not used. Hence we could miss an opportunity
7490 when a function has an unused argument in r3. But it is better to be
7491 safe than to be sorry. */
7492 if (! cfun
->machine
->uses_anonymous_args
7493 && crtl
->args
.size
>= 0
7494 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7495 && (TARGET_AAPCS_BASED
7496 ? crtl
->args
.info
.aapcs_ncrn
< 4
7497 : crtl
->args
.info
.nregs
< 4))
7498 return LAST_ARG_REGNUM
;
7500 /* Otherwise look for a call-saved register that is going to be pushed. */
7501 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7502 if (pushed_regs_mask
& (1 << reg
))
7507 /* Thumb-2 can use high regs. */
7508 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7509 if (pushed_regs_mask
& (1 << reg
))
7512 /* Something went wrong - thumb_compute_save_reg_mask()
7513 should have arranged for a suitable register to be pushed. */
7517 static GTY(()) int pic_labelno
;
7519 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7523 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7525 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7527 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7530 gcc_assert (flag_pic
);
7532 pic_reg
= cfun
->machine
->pic_reg
;
7533 if (TARGET_VXWORKS_RTP
)
7535 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7536 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7537 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7539 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7541 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7542 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7546 /* We use an UNSPEC rather than a LABEL_REF because this label
7547 never appears in the code stream. */
7549 labelno
= GEN_INT (pic_labelno
++);
7550 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7551 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7553 /* On the ARM the PC register contains 'dot + 8' at the time of the
7554 addition, on the Thumb it is 'dot + 4'. */
7555 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7556 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7558 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7562 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7564 else /* TARGET_THUMB1 */
7566 if (arm_pic_register
!= INVALID_REGNUM
7567 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7569 /* We will have pushed the pic register, so we should always be
7570 able to find a work register. */
7571 pic_tmp
= gen_rtx_REG (SImode
,
7572 thumb_find_work_register (saved_regs
));
7573 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7574 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7575 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7577 else if (arm_pic_register
!= INVALID_REGNUM
7578 && arm_pic_register
> LAST_LO_REGNUM
7579 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7581 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7582 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7583 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7586 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7590 /* Need to emit this whether or not we obey regdecls,
7591 since setjmp/longjmp can cause life info to screw up. */
7595 /* Generate code to load the address of a static var when flag_pic is set. */
7597 arm_pic_static_addr (rtx orig
, rtx reg
)
7599 rtx l1
, labelno
, offset_rtx
;
7601 gcc_assert (flag_pic
);
7603 /* We use an UNSPEC rather than a LABEL_REF because this label
7604 never appears in the code stream. */
7605 labelno
= GEN_INT (pic_labelno
++);
7606 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7607 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7609 /* On the ARM the PC register contains 'dot + 8' at the time of the
7610 addition, on the Thumb it is 'dot + 4'. */
7611 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7612 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7613 UNSPEC_SYMBOL_OFFSET
);
7614 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7616 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7619 /* Return nonzero if X is valid as an ARM state addressing register. */
7621 arm_address_register_rtx_p (rtx x
, int strict_p
)
7631 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7633 return (regno
<= LAST_ARM_REGNUM
7634 || regno
>= FIRST_PSEUDO_REGISTER
7635 || regno
== FRAME_POINTER_REGNUM
7636 || regno
== ARG_POINTER_REGNUM
);
7639 /* Return TRUE if this rtx is the difference of a symbol and a label,
7640 and will reduce to a PC-relative relocation in the object file.
7641 Expressions like this can be left alone when generating PIC, rather
7642 than forced through the GOT. */
7644 pcrel_constant_p (rtx x
)
7646 if (GET_CODE (x
) == MINUS
)
7647 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7652 /* Return true if X will surely end up in an index register after next
7655 will_be_in_index_register (const_rtx x
)
7657 /* arm.md: calculate_pic_address will split this into a register. */
7658 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7661 /* Return nonzero if X is a valid ARM state address operand. */
7663 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7667 enum rtx_code code
= GET_CODE (x
);
7669 if (arm_address_register_rtx_p (x
, strict_p
))
7672 use_ldrd
= (TARGET_LDRD
7673 && (mode
== DImode
|| mode
== DFmode
));
7675 if (code
== POST_INC
|| code
== PRE_DEC
7676 || ((code
== PRE_INC
|| code
== POST_DEC
)
7677 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7678 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7680 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7681 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7682 && GET_CODE (XEXP (x
, 1)) == PLUS
7683 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7685 rtx addend
= XEXP (XEXP (x
, 1), 1);
7687 /* Don't allow ldrd post increment by register because it's hard
7688 to fixup invalid register choices. */
7690 && GET_CODE (x
) == POST_MODIFY
7694 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7695 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7698 /* After reload constants split into minipools will have addresses
7699 from a LABEL_REF. */
7700 else if (reload_completed
7701 && (code
== LABEL_REF
7703 && GET_CODE (XEXP (x
, 0)) == PLUS
7704 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7705 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7708 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7711 else if (code
== PLUS
)
7713 rtx xop0
= XEXP (x
, 0);
7714 rtx xop1
= XEXP (x
, 1);
7716 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7717 && ((CONST_INT_P (xop1
)
7718 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7719 || (!strict_p
&& will_be_in_index_register (xop1
))))
7720 || (arm_address_register_rtx_p (xop1
, strict_p
)
7721 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7725 /* Reload currently can't handle MINUS, so disable this for now */
7726 else if (GET_CODE (x
) == MINUS
)
7728 rtx xop0
= XEXP (x
, 0);
7729 rtx xop1
= XEXP (x
, 1);
7731 return (arm_address_register_rtx_p (xop0
, strict_p
)
7732 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7736 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7737 && code
== SYMBOL_REF
7738 && CONSTANT_POOL_ADDRESS_P (x
)
7740 && symbol_mentioned_p (get_pool_constant (x
))
7741 && ! pcrel_constant_p (get_pool_constant (x
))))
7747 /* Return true if we can avoid creating a constant pool entry for x. */
7749 can_avoid_literal_pool_for_label_p (rtx x
)
7751 /* Normally we can assign constant values to target registers without
7752 the help of constant pool. But there are cases we have to use constant
7754 1) assign a label to register.
7755 2) sign-extend a 8bit value to 32bit and then assign to register.
7757 Constant pool access in format:
7758 (set (reg r0) (mem (symbol_ref (".LC0"))))
7759 will cause the use of literal pool (later in function arm_reorg).
7760 So here we mark such format as an invalid format, then the compiler
7761 will adjust it into:
7762 (set (reg r0) (symbol_ref (".LC0")))
7763 (set (reg r0) (mem (reg r0))).
7764 No extra register is required, and (mem (reg r0)) won't cause the use
7765 of literal pools. */
7766 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7767 && CONSTANT_POOL_ADDRESS_P (x
))
7773 /* Return nonzero if X is a valid Thumb-2 address operand. */
7775 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7778 enum rtx_code code
= GET_CODE (x
);
7780 if (arm_address_register_rtx_p (x
, strict_p
))
7783 use_ldrd
= (TARGET_LDRD
7784 && (mode
== DImode
|| mode
== DFmode
));
7786 if (code
== POST_INC
|| code
== PRE_DEC
7787 || ((code
== PRE_INC
|| code
== POST_DEC
)
7788 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7789 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7791 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7792 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7793 && GET_CODE (XEXP (x
, 1)) == PLUS
7794 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7796 /* Thumb-2 only has autoincrement by constant. */
7797 rtx addend
= XEXP (XEXP (x
, 1), 1);
7798 HOST_WIDE_INT offset
;
7800 if (!CONST_INT_P (addend
))
7803 offset
= INTVAL(addend
);
7804 if (GET_MODE_SIZE (mode
) <= 4)
7805 return (offset
> -256 && offset
< 256);
7807 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7808 && (offset
& 3) == 0);
7811 /* After reload constants split into minipools will have addresses
7812 from a LABEL_REF. */
7813 else if (reload_completed
7814 && (code
== LABEL_REF
7816 && GET_CODE (XEXP (x
, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7818 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7821 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7824 else if (code
== PLUS
)
7826 rtx xop0
= XEXP (x
, 0);
7827 rtx xop1
= XEXP (x
, 1);
7829 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7830 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7831 || (!strict_p
&& will_be_in_index_register (xop1
))))
7832 || (arm_address_register_rtx_p (xop1
, strict_p
)
7833 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7836 else if (can_avoid_literal_pool_for_label_p (x
))
7839 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7840 && code
== SYMBOL_REF
7841 && CONSTANT_POOL_ADDRESS_P (x
)
7843 && symbol_mentioned_p (get_pool_constant (x
))
7844 && ! pcrel_constant_p (get_pool_constant (x
))))
7850 /* Return nonzero if INDEX is valid for an address index operand in
7853 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7856 HOST_WIDE_INT range
;
7857 enum rtx_code code
= GET_CODE (index
);
7859 /* Standard coprocessor addressing modes. */
7860 if (TARGET_HARD_FLOAT
7861 && (mode
== SFmode
|| mode
== DFmode
))
7862 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7863 && INTVAL (index
) > -1024
7864 && (INTVAL (index
) & 3) == 0);
7866 /* For quad modes, we restrict the constant offset to be slightly less
7867 than what the instruction format permits. We do this because for
7868 quad mode moves, we will actually decompose them into two separate
7869 double-mode reads or writes. INDEX must therefore be a valid
7870 (double-mode) offset and so should INDEX+8. */
7871 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7872 return (code
== CONST_INT
7873 && INTVAL (index
) < 1016
7874 && INTVAL (index
) > -1024
7875 && (INTVAL (index
) & 3) == 0);
7877 /* We have no such constraint on double mode offsets, so we permit the
7878 full range of the instruction format. */
7879 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7880 return (code
== CONST_INT
7881 && INTVAL (index
) < 1024
7882 && INTVAL (index
) > -1024
7883 && (INTVAL (index
) & 3) == 0);
7885 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7886 return (code
== CONST_INT
7887 && INTVAL (index
) < 1024
7888 && INTVAL (index
) > -1024
7889 && (INTVAL (index
) & 3) == 0);
7891 if (arm_address_register_rtx_p (index
, strict_p
)
7892 && (GET_MODE_SIZE (mode
) <= 4))
7895 if (mode
== DImode
|| mode
== DFmode
)
7897 if (code
== CONST_INT
)
7899 HOST_WIDE_INT val
= INTVAL (index
);
7902 return val
> -256 && val
< 256;
7904 return val
> -4096 && val
< 4092;
7907 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7910 if (GET_MODE_SIZE (mode
) <= 4
7914 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7918 rtx xiop0
= XEXP (index
, 0);
7919 rtx xiop1
= XEXP (index
, 1);
7921 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7922 && power_of_two_operand (xiop1
, SImode
))
7923 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7924 && power_of_two_operand (xiop0
, SImode
)));
7926 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7927 || code
== ASHIFT
|| code
== ROTATERT
)
7929 rtx op
= XEXP (index
, 1);
7931 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7934 && INTVAL (op
) <= 31);
7938 /* For ARM v4 we may be doing a sign-extend operation during the
7944 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7950 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
7952 return (code
== CONST_INT
7953 && INTVAL (index
) < range
7954 && INTVAL (index
) > -range
);
7957 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7958 index operand. i.e. 1, 2, 4 or 8. */
7960 thumb2_index_mul_operand (rtx op
)
7964 if (!CONST_INT_P (op
))
7968 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
7971 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7973 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
7975 enum rtx_code code
= GET_CODE (index
);
7977 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7978 /* Standard coprocessor addressing modes. */
7979 if (TARGET_HARD_FLOAT
7980 && (mode
== SFmode
|| mode
== DFmode
))
7981 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7982 /* Thumb-2 allows only > -256 index range for it's core register
7983 load/stores. Since we allow SF/DF in core registers, we have
7984 to use the intersection between -256~4096 (core) and -1024~1024
7986 && INTVAL (index
) > -256
7987 && (INTVAL (index
) & 3) == 0);
7989 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7991 /* For DImode assume values will usually live in core regs
7992 and only allow LDRD addressing modes. */
7993 if (!TARGET_LDRD
|| mode
!= DImode
)
7994 return (code
== CONST_INT
7995 && INTVAL (index
) < 1024
7996 && INTVAL (index
) > -1024
7997 && (INTVAL (index
) & 3) == 0);
8000 /* For quad modes, we restrict the constant offset to be slightly less
8001 than what the instruction format permits. We do this because for
8002 quad mode moves, we will actually decompose them into two separate
8003 double-mode reads or writes. INDEX must therefore be a valid
8004 (double-mode) offset and so should INDEX+8. */
8005 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8006 return (code
== CONST_INT
8007 && INTVAL (index
) < 1016
8008 && INTVAL (index
) > -1024
8009 && (INTVAL (index
) & 3) == 0);
8011 /* We have no such constraint on double mode offsets, so we permit the
8012 full range of the instruction format. */
8013 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8014 return (code
== CONST_INT
8015 && INTVAL (index
) < 1024
8016 && INTVAL (index
) > -1024
8017 && (INTVAL (index
) & 3) == 0);
8019 if (arm_address_register_rtx_p (index
, strict_p
)
8020 && (GET_MODE_SIZE (mode
) <= 4))
8023 if (mode
== DImode
|| mode
== DFmode
)
8025 if (code
== CONST_INT
)
8027 HOST_WIDE_INT val
= INTVAL (index
);
8028 /* ??? Can we assume ldrd for thumb2? */
8029 /* Thumb-2 ldrd only has reg+const addressing modes. */
8030 /* ldrd supports offsets of +-1020.
8031 However the ldr fallback does not. */
8032 return val
> -256 && val
< 256 && (val
& 3) == 0;
8040 rtx xiop0
= XEXP (index
, 0);
8041 rtx xiop1
= XEXP (index
, 1);
8043 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8044 && thumb2_index_mul_operand (xiop1
))
8045 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8046 && thumb2_index_mul_operand (xiop0
)));
8048 else if (code
== ASHIFT
)
8050 rtx op
= XEXP (index
, 1);
8052 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8055 && INTVAL (op
) <= 3);
8058 return (code
== CONST_INT
8059 && INTVAL (index
) < 4096
8060 && INTVAL (index
) > -256);
8063 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8065 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8075 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8077 return (regno
<= LAST_LO_REGNUM
8078 || regno
> LAST_VIRTUAL_REGISTER
8079 || regno
== FRAME_POINTER_REGNUM
8080 || (GET_MODE_SIZE (mode
) >= 4
8081 && (regno
== STACK_POINTER_REGNUM
8082 || regno
>= FIRST_PSEUDO_REGISTER
8083 || x
== hard_frame_pointer_rtx
8084 || x
== arg_pointer_rtx
)));
8087 /* Return nonzero if x is a legitimate index register. This is the case
8088 for any base register that can access a QImode object. */
8090 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8092 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8095 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8097 The AP may be eliminated to either the SP or the FP, so we use the
8098 least common denominator, e.g. SImode, and offsets from 0 to 64.
8100 ??? Verify whether the above is the right approach.
8102 ??? Also, the FP may be eliminated to the SP, so perhaps that
8103 needs special handling also.
8105 ??? Look at how the mips16 port solves this problem. It probably uses
8106 better ways to solve some of these problems.
8108 Although it is not incorrect, we don't accept QImode and HImode
8109 addresses based on the frame pointer or arg pointer until the
8110 reload pass starts. This is so that eliminating such addresses
8111 into stack based ones won't produce impossible code. */
8113 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8115 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8118 /* ??? Not clear if this is right. Experiment. */
8119 if (GET_MODE_SIZE (mode
) < 4
8120 && !(reload_in_progress
|| reload_completed
)
8121 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8122 || reg_mentioned_p (arg_pointer_rtx
, x
)
8123 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8124 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8125 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8126 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8129 /* Accept any base register. SP only in SImode or larger. */
8130 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8133 /* This is PC relative data before arm_reorg runs. */
8134 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8135 && GET_CODE (x
) == SYMBOL_REF
8136 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8139 /* This is PC relative data after arm_reorg runs. */
8140 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8142 && (GET_CODE (x
) == LABEL_REF
8143 || (GET_CODE (x
) == CONST
8144 && GET_CODE (XEXP (x
, 0)) == PLUS
8145 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8146 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8149 /* Post-inc indexing only supported for SImode and larger. */
8150 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8151 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8154 else if (GET_CODE (x
) == PLUS
)
8156 /* REG+REG address can be any two index registers. */
8157 /* We disallow FRAME+REG addressing since we know that FRAME
8158 will be replaced with STACK, and SP relative addressing only
8159 permits SP+OFFSET. */
8160 if (GET_MODE_SIZE (mode
) <= 4
8161 && XEXP (x
, 0) != frame_pointer_rtx
8162 && XEXP (x
, 1) != frame_pointer_rtx
8163 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8164 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8165 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8168 /* REG+const has 5-7 bit offset for non-SP registers. */
8169 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8170 || XEXP (x
, 0) == arg_pointer_rtx
)
8171 && CONST_INT_P (XEXP (x
, 1))
8172 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8175 /* REG+const has 10-bit offset for SP, but only SImode and
8176 larger is supported. */
8177 /* ??? Should probably check for DI/DFmode overflow here
8178 just like GO_IF_LEGITIMATE_OFFSET does. */
8179 else if (REG_P (XEXP (x
, 0))
8180 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8181 && GET_MODE_SIZE (mode
) >= 4
8182 && CONST_INT_P (XEXP (x
, 1))
8183 && INTVAL (XEXP (x
, 1)) >= 0
8184 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8185 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8188 else if (REG_P (XEXP (x
, 0))
8189 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8190 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8191 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8192 && REGNO (XEXP (x
, 0))
8193 <= LAST_VIRTUAL_POINTER_REGISTER
))
8194 && GET_MODE_SIZE (mode
) >= 4
8195 && CONST_INT_P (XEXP (x
, 1))
8196 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8200 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8201 && GET_MODE_SIZE (mode
) == 4
8202 && GET_CODE (x
) == SYMBOL_REF
8203 && CONSTANT_POOL_ADDRESS_P (x
)
8205 && symbol_mentioned_p (get_pool_constant (x
))
8206 && ! pcrel_constant_p (get_pool_constant (x
))))
8212 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8213 instruction of mode MODE. */
8215 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8217 switch (GET_MODE_SIZE (mode
))
8220 return val
>= 0 && val
< 32;
8223 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8227 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8233 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8236 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8237 else if (TARGET_THUMB2
)
8238 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8239 else /* if (TARGET_THUMB1) */
8240 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8243 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8245 Given an rtx X being reloaded into a reg required to be
8246 in class CLASS, return the class of reg to actually use.
8247 In general this is just CLASS, but for the Thumb core registers and
8248 immediate constants we prefer a LO_REGS class or a subset. */
8251 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8257 if (rclass
== GENERAL_REGS
)
8264 /* Build the SYMBOL_REF for __tls_get_addr. */
8266 static GTY(()) rtx tls_get_addr_libfunc
;
8269 get_tls_get_addr (void)
8271 if (!tls_get_addr_libfunc
)
8272 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8273 return tls_get_addr_libfunc
;
8277 arm_load_tp (rtx target
)
8280 target
= gen_reg_rtx (SImode
);
8284 /* Can return in any reg. */
8285 emit_insn (gen_load_tp_hard (target
));
8289 /* Always returned in r0. Immediately copy the result into a pseudo,
8290 otherwise other uses of r0 (e.g. setting up function arguments) may
8291 clobber the value. */
8295 emit_insn (gen_load_tp_soft ());
8297 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8298 emit_move_insn (target
, tmp
);
8304 load_tls_operand (rtx x
, rtx reg
)
8308 if (reg
== NULL_RTX
)
8309 reg
= gen_reg_rtx (SImode
);
8311 tmp
= gen_rtx_CONST (SImode
, x
);
8313 emit_move_insn (reg
, tmp
);
8319 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8321 rtx label
, labelno
, sum
;
8323 gcc_assert (reloc
!= TLS_DESCSEQ
);
8326 labelno
= GEN_INT (pic_labelno
++);
8327 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8328 label
= gen_rtx_CONST (VOIDmode
, label
);
8330 sum
= gen_rtx_UNSPEC (Pmode
,
8331 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8332 GEN_INT (TARGET_ARM
? 8 : 4)),
8334 reg
= load_tls_operand (sum
, reg
);
8337 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8339 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8341 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8342 LCT_PURE
, /* LCT_CONST? */
8343 Pmode
, 1, reg
, Pmode
);
8345 rtx_insn
*insns
= get_insns ();
8352 arm_tls_descseq_addr (rtx x
, rtx reg
)
8354 rtx labelno
= GEN_INT (pic_labelno
++);
8355 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8356 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8357 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8358 gen_rtx_CONST (VOIDmode
, label
),
8359 GEN_INT (!TARGET_ARM
)),
8361 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8363 emit_insn (gen_tlscall (x
, labelno
));
8365 reg
= gen_reg_rtx (SImode
);
8367 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8369 emit_move_insn (reg
, reg0
);
8375 legitimize_tls_address (rtx x
, rtx reg
)
8377 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8379 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8383 case TLS_MODEL_GLOBAL_DYNAMIC
:
8384 if (TARGET_GNU2_TLS
)
8386 reg
= arm_tls_descseq_addr (x
, reg
);
8388 tp
= arm_load_tp (NULL_RTX
);
8390 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8394 /* Original scheme */
8395 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8396 dest
= gen_reg_rtx (Pmode
);
8397 emit_libcall_block (insns
, dest
, ret
, x
);
8401 case TLS_MODEL_LOCAL_DYNAMIC
:
8402 if (TARGET_GNU2_TLS
)
8404 reg
= arm_tls_descseq_addr (x
, reg
);
8406 tp
= arm_load_tp (NULL_RTX
);
8408 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8412 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8414 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8415 share the LDM result with other LD model accesses. */
8416 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8418 dest
= gen_reg_rtx (Pmode
);
8419 emit_libcall_block (insns
, dest
, ret
, eqv
);
8421 /* Load the addend. */
8422 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8423 GEN_INT (TLS_LDO32
)),
8425 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8426 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8430 case TLS_MODEL_INITIAL_EXEC
:
8431 labelno
= GEN_INT (pic_labelno
++);
8432 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8433 label
= gen_rtx_CONST (VOIDmode
, label
);
8434 sum
= gen_rtx_UNSPEC (Pmode
,
8435 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8436 GEN_INT (TARGET_ARM
? 8 : 4)),
8438 reg
= load_tls_operand (sum
, reg
);
8441 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8442 else if (TARGET_THUMB2
)
8443 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8446 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8447 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8450 tp
= arm_load_tp (NULL_RTX
);
8452 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8454 case TLS_MODEL_LOCAL_EXEC
:
8455 tp
= arm_load_tp (NULL_RTX
);
8457 reg
= gen_rtx_UNSPEC (Pmode
,
8458 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8460 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8462 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8469 /* Try machine-dependent ways of modifying an illegitimate address
8470 to be legitimate. If we find one, return the new, valid address. */
8472 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8474 if (arm_tls_referenced_p (x
))
8478 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8480 addend
= XEXP (XEXP (x
, 0), 1);
8481 x
= XEXP (XEXP (x
, 0), 0);
8484 if (GET_CODE (x
) != SYMBOL_REF
)
8487 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8489 x
= legitimize_tls_address (x
, NULL_RTX
);
8493 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8502 /* TODO: legitimize_address for Thumb2. */
8505 return thumb_legitimize_address (x
, orig_x
, mode
);
8508 if (GET_CODE (x
) == PLUS
)
8510 rtx xop0
= XEXP (x
, 0);
8511 rtx xop1
= XEXP (x
, 1);
8513 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8514 xop0
= force_reg (SImode
, xop0
);
8516 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8517 && !symbol_mentioned_p (xop1
))
8518 xop1
= force_reg (SImode
, xop1
);
8520 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8521 && CONST_INT_P (xop1
))
8523 HOST_WIDE_INT n
, low_n
;
8527 /* VFP addressing modes actually allow greater offsets, but for
8528 now we just stick with the lowest common denominator. */
8529 if (mode
== DImode
|| mode
== DFmode
)
8541 low_n
= ((mode
) == TImode
? 0
8542 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8546 base_reg
= gen_reg_rtx (SImode
);
8547 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8548 emit_move_insn (base_reg
, val
);
8549 x
= plus_constant (Pmode
, base_reg
, low_n
);
8551 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8552 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8555 /* XXX We don't allow MINUS any more -- see comment in
8556 arm_legitimate_address_outer_p (). */
8557 else if (GET_CODE (x
) == MINUS
)
8559 rtx xop0
= XEXP (x
, 0);
8560 rtx xop1
= XEXP (x
, 1);
8562 if (CONSTANT_P (xop0
))
8563 xop0
= force_reg (SImode
, xop0
);
8565 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8566 xop1
= force_reg (SImode
, xop1
);
8568 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8569 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8572 /* Make sure to take full advantage of the pre-indexed addressing mode
8573 with absolute addresses which often allows for the base register to
8574 be factorized for multiple adjacent memory references, and it might
8575 even allows for the mini pool to be avoided entirely. */
8576 else if (CONST_INT_P (x
) && optimize
> 0)
8579 HOST_WIDE_INT mask
, base
, index
;
8582 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8583 use a 8-bit index. So let's use a 12-bit index for SImode only and
8584 hope that arm_gen_constant will enable ldrb to use more bits. */
8585 bits
= (mode
== SImode
) ? 12 : 8;
8586 mask
= (1 << bits
) - 1;
8587 base
= INTVAL (x
) & ~mask
;
8588 index
= INTVAL (x
) & mask
;
8589 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8591 /* It'll most probably be more efficient to generate the base
8592 with more bits set and use a negative index instead. */
8596 base_reg
= force_reg (SImode
, GEN_INT (base
));
8597 x
= plus_constant (Pmode
, base_reg
, index
);
8602 /* We need to find and carefully transform any SYMBOL and LABEL
8603 references; so go back to the original address expression. */
8604 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8606 if (new_x
!= orig_x
)
8614 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8615 to be legitimate. If we find one, return the new, valid address. */
8617 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8619 if (GET_CODE (x
) == PLUS
8620 && CONST_INT_P (XEXP (x
, 1))
8621 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8622 || INTVAL (XEXP (x
, 1)) < 0))
8624 rtx xop0
= XEXP (x
, 0);
8625 rtx xop1
= XEXP (x
, 1);
8626 HOST_WIDE_INT offset
= INTVAL (xop1
);
8628 /* Try and fold the offset into a biasing of the base register and
8629 then offsetting that. Don't do this when optimizing for space
8630 since it can cause too many CSEs. */
8631 if (optimize_size
&& offset
>= 0
8632 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8634 HOST_WIDE_INT delta
;
8637 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8638 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8639 delta
= 31 * GET_MODE_SIZE (mode
);
8641 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8643 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8645 x
= plus_constant (Pmode
, xop0
, delta
);
8647 else if (offset
< 0 && offset
> -256)
8648 /* Small negative offsets are best done with a subtract before the
8649 dereference, forcing these into a register normally takes two
8651 x
= force_operand (x
, NULL_RTX
);
8654 /* For the remaining cases, force the constant into a register. */
8655 xop1
= force_reg (SImode
, xop1
);
8656 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8659 else if (GET_CODE (x
) == PLUS
8660 && s_register_operand (XEXP (x
, 1), SImode
)
8661 && !s_register_operand (XEXP (x
, 0), SImode
))
8663 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8665 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8670 /* We need to find and carefully transform any SYMBOL and LABEL
8671 references; so go back to the original address expression. */
8672 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8674 if (new_x
!= orig_x
)
8681 /* Return TRUE if X contains any TLS symbol references. */
8684 arm_tls_referenced_p (rtx x
)
8686 if (! TARGET_HAVE_TLS
)
8689 subrtx_iterator::array_type array
;
8690 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8692 const_rtx x
= *iter
;
8693 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8695 /* ARM currently does not provide relocations to encode TLS variables
8696 into AArch32 instructions, only data, so there is no way to
8697 currently implement these if a literal pool is disabled. */
8698 if (arm_disable_literal_pool
)
8699 sorry ("accessing thread-local storage is not currently supported "
8700 "with -mpure-code or -mslow-flash-data");
8705 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8706 TLS offsets, not real symbol references. */
8707 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8708 iter
.skip_subrtxes ();
8713 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8715 On the ARM, allow any integer (invalid ones are removed later by insn
8716 patterns), nice doubles and symbol_refs which refer to the function's
8719 When generating pic allow anything. */
8722 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8724 return flag_pic
|| !label_mentioned_p (x
);
8728 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8730 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8731 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8732 for ARMv8-M Baseline or later the result is valid. */
8733 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8736 return (CONST_INT_P (x
)
8737 || CONST_DOUBLE_P (x
)
8738 || CONSTANT_ADDRESS_P (x
)
8739 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8744 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8746 return (!arm_cannot_force_const_mem (mode
, x
)
8748 ? arm_legitimate_constant_p_1 (mode
, x
)
8749 : thumb_legitimate_constant_p (mode
, x
)));
8752 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8755 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8759 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8761 split_const (x
, &base
, &offset
);
8762 if (GET_CODE (base
) == SYMBOL_REF
8763 && !offset_within_block_p (base
, INTVAL (offset
)))
8766 return arm_tls_referenced_p (x
);
8769 #define REG_OR_SUBREG_REG(X) \
8771 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8773 #define REG_OR_SUBREG_RTX(X) \
8774 (REG_P (X) ? (X) : SUBREG_REG (X))
8777 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8779 machine_mode mode
= GET_MODE (x
);
8788 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8795 return COSTS_N_INSNS (1);
8798 if (arm_arch6m
&& arm_m_profile_small_mul
)
8799 return COSTS_N_INSNS (32);
8801 if (CONST_INT_P (XEXP (x
, 1)))
8804 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8811 return COSTS_N_INSNS (2) + cycles
;
8813 return COSTS_N_INSNS (1) + 16;
8816 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8818 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8819 return (COSTS_N_INSNS (words
)
8820 + 4 * ((MEM_P (SET_SRC (x
)))
8821 + MEM_P (SET_DEST (x
))));
8826 if (UINTVAL (x
) < 256
8827 /* 16-bit constant. */
8828 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8830 if (thumb_shiftable_const (INTVAL (x
)))
8831 return COSTS_N_INSNS (2);
8832 return COSTS_N_INSNS (3);
8834 else if ((outer
== PLUS
|| outer
== COMPARE
)
8835 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8837 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8838 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8839 return COSTS_N_INSNS (1);
8840 else if (outer
== AND
)
8843 /* This duplicates the tests in the andsi3 expander. */
8844 for (i
= 9; i
<= 31; i
++)
8845 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8846 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8847 return COSTS_N_INSNS (2);
8849 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8850 || outer
== LSHIFTRT
)
8852 return COSTS_N_INSNS (2);
8858 return COSTS_N_INSNS (3);
8876 /* XXX another guess. */
8877 /* Memory costs quite a lot for the first word, but subsequent words
8878 load at the equivalent of a single insn each. */
8879 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8880 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8885 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8891 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8892 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8898 return total
+ COSTS_N_INSNS (1);
8900 /* Assume a two-shift sequence. Increase the cost slightly so
8901 we prefer actual shifts over an extend operation. */
8902 return total
+ 1 + COSTS_N_INSNS (2);
8909 /* Estimates the size cost of thumb1 instructions.
8910 For now most of the code is copied from thumb1_rtx_costs. We need more
8911 fine grain tuning when we have more related test cases. */
8913 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8915 machine_mode mode
= GET_MODE (x
);
8924 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8928 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8929 defined by RTL expansion, especially for the expansion of
8931 if ((GET_CODE (XEXP (x
, 0)) == MULT
8932 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8933 || (GET_CODE (XEXP (x
, 1)) == MULT
8934 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8935 return COSTS_N_INSNS (2);
8940 return COSTS_N_INSNS (1);
8943 if (CONST_INT_P (XEXP (x
, 1)))
8945 /* Thumb1 mul instruction can't operate on const. We must Load it
8946 into a register first. */
8947 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8948 /* For the targets which have a very small and high-latency multiply
8949 unit, we prefer to synthesize the mult with up to 5 instructions,
8950 giving a good balance between size and performance. */
8951 if (arm_arch6m
&& arm_m_profile_small_mul
)
8952 return COSTS_N_INSNS (5);
8954 return COSTS_N_INSNS (1) + const_size
;
8956 return COSTS_N_INSNS (1);
8959 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8961 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8962 cost
= COSTS_N_INSNS (words
);
8963 if (satisfies_constraint_J (SET_SRC (x
))
8964 || satisfies_constraint_K (SET_SRC (x
))
8965 /* Too big an immediate for a 2-byte mov, using MOVT. */
8966 || (CONST_INT_P (SET_SRC (x
))
8967 && UINTVAL (SET_SRC (x
)) >= 256
8969 && satisfies_constraint_j (SET_SRC (x
)))
8970 /* thumb1_movdi_insn. */
8971 || ((words
> 1) && MEM_P (SET_SRC (x
))))
8972 cost
+= COSTS_N_INSNS (1);
8978 if (UINTVAL (x
) < 256)
8979 return COSTS_N_INSNS (1);
8980 /* movw is 4byte long. */
8981 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
8982 return COSTS_N_INSNS (2);
8983 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8984 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
8985 return COSTS_N_INSNS (2);
8986 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8987 if (thumb_shiftable_const (INTVAL (x
)))
8988 return COSTS_N_INSNS (2);
8989 return COSTS_N_INSNS (3);
8991 else if ((outer
== PLUS
|| outer
== COMPARE
)
8992 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8994 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8995 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8996 return COSTS_N_INSNS (1);
8997 else if (outer
== AND
)
9000 /* This duplicates the tests in the andsi3 expander. */
9001 for (i
= 9; i
<= 31; i
++)
9002 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9003 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9004 return COSTS_N_INSNS (2);
9006 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9007 || outer
== LSHIFTRT
)
9009 return COSTS_N_INSNS (2);
9015 return COSTS_N_INSNS (3);
9029 return COSTS_N_INSNS (1);
9032 return (COSTS_N_INSNS (1)
9034 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9035 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9036 ? COSTS_N_INSNS (1) : 0));
9040 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9045 /* XXX still guessing. */
9046 switch (GET_MODE (XEXP (x
, 0)))
9049 return (1 + (mode
== DImode
? 4 : 0)
9050 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9053 return (4 + (mode
== DImode
? 4 : 0)
9054 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9057 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9068 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9069 operand, then return the operand that is being shifted. If the shift
9070 is not by a constant, then set SHIFT_REG to point to the operand.
9071 Return NULL if OP is not a shifter operand. */
9073 shifter_op_p (rtx op
, rtx
*shift_reg
)
9075 enum rtx_code code
= GET_CODE (op
);
9077 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9078 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9079 return XEXP (op
, 0);
9080 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9081 return XEXP (op
, 0);
9082 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9083 || code
== ASHIFTRT
)
9085 if (!CONST_INT_P (XEXP (op
, 1)))
9086 *shift_reg
= XEXP (op
, 1);
9087 return XEXP (op
, 0);
9094 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9096 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9097 rtx_code code
= GET_CODE (x
);
9098 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9100 switch (XINT (x
, 1))
9102 case UNSPEC_UNALIGNED_LOAD
:
9103 /* We can only do unaligned loads into the integer unit, and we can't
9105 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9107 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9108 + extra_cost
->ldst
.load_unaligned
);
9111 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9112 ADDR_SPACE_GENERIC
, speed_p
);
9116 case UNSPEC_UNALIGNED_STORE
:
9117 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9119 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9120 + extra_cost
->ldst
.store_unaligned
);
9122 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9124 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9125 ADDR_SPACE_GENERIC
, speed_p
);
9136 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9140 *cost
= COSTS_N_INSNS (2);
9146 /* Cost of a libcall. We assume one insn per argument, an amount for the
9147 call (one insn for -Os) and then one for processing the result. */
9148 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9150 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9153 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9154 if (shift_op != NULL \
9155 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9160 *cost += extra_cost->alu.arith_shift_reg; \
9161 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9162 ASHIFT, 1, speed_p); \
9165 *cost += extra_cost->alu.arith_shift; \
9167 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9168 ASHIFT, 0, speed_p) \
9169 + rtx_cost (XEXP (x, 1 - IDX), \
9170 GET_MODE (shift_op), \
9177 /* RTX costs. Make an estimate of the cost of executing the operation
9178 X, which is contained with an operation with code OUTER_CODE.
9179 SPEED_P indicates whether the cost desired is the performance cost,
9180 or the size cost. The estimate is stored in COST and the return
9181 value is TRUE if the cost calculation is final, or FALSE if the
9182 caller should recurse through the operands of X to add additional
9185 We currently make no attempt to model the size savings of Thumb-2
9186 16-bit instructions. At the normal points in compilation where
9187 this code is called we have no measure of whether the condition
9188 flags are live or not, and thus no realistic way to determine what
9189 the size will eventually be. */
9191 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9192 const struct cpu_cost_table
*extra_cost
,
9193 int *cost
, bool speed_p
)
9195 machine_mode mode
= GET_MODE (x
);
9197 *cost
= COSTS_N_INSNS (1);
9202 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9204 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9212 /* SET RTXs don't have a mode so we get it from the destination. */
9213 mode
= GET_MODE (SET_DEST (x
));
9215 if (REG_P (SET_SRC (x
))
9216 && REG_P (SET_DEST (x
)))
9218 /* Assume that most copies can be done with a single insn,
9219 unless we don't have HW FP, in which case everything
9220 larger than word mode will require two insns. */
9221 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9222 && GET_MODE_SIZE (mode
) > 4)
9225 /* Conditional register moves can be encoded
9226 in 16 bits in Thumb mode. */
9227 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9233 if (CONST_INT_P (SET_SRC (x
)))
9235 /* Handle CONST_INT here, since the value doesn't have a mode
9236 and we would otherwise be unable to work out the true cost. */
9237 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9240 /* Slightly lower the cost of setting a core reg to a constant.
9241 This helps break up chains and allows for better scheduling. */
9242 if (REG_P (SET_DEST (x
))
9243 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9246 /* Immediate moves with an immediate in the range [0, 255] can be
9247 encoded in 16 bits in Thumb mode. */
9248 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9249 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9251 goto const_int_cost
;
9257 /* A memory access costs 1 insn if the mode is small, or the address is
9258 a single register, otherwise it costs one insn per word. */
9259 if (REG_P (XEXP (x
, 0)))
9260 *cost
= COSTS_N_INSNS (1);
9262 && GET_CODE (XEXP (x
, 0)) == PLUS
9263 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9264 /* This will be split into two instructions.
9265 See arm.md:calculate_pic_address. */
9266 *cost
= COSTS_N_INSNS (2);
9268 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9270 /* For speed optimizations, add the costs of the address and
9271 accessing memory. */
9274 *cost
+= (extra_cost
->ldst
.load
9275 + arm_address_cost (XEXP (x
, 0), mode
,
9276 ADDR_SPACE_GENERIC
, speed_p
));
9278 *cost
+= extra_cost
->ldst
.load
;
9284 /* Calculations of LDM costs are complex. We assume an initial cost
9285 (ldm_1st) which will load the number of registers mentioned in
9286 ldm_regs_per_insn_1st registers; then each additional
9287 ldm_regs_per_insn_subsequent registers cost one more insn. The
9288 formula for N regs is thus:
9290 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9291 + ldm_regs_per_insn_subsequent - 1)
9292 / ldm_regs_per_insn_subsequent).
9294 Additional costs may also be added for addressing. A similar
9295 formula is used for STM. */
9297 bool is_ldm
= load_multiple_operation (x
, SImode
);
9298 bool is_stm
= store_multiple_operation (x
, SImode
);
9300 if (is_ldm
|| is_stm
)
9304 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9305 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9306 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9307 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9308 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9309 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9310 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9312 *cost
+= regs_per_insn_1st
9313 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9314 + regs_per_insn_sub
- 1)
9315 / regs_per_insn_sub
);
9324 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9325 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9326 *cost
+= COSTS_N_INSNS (speed_p
9327 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9328 else if (mode
== SImode
&& TARGET_IDIV
)
9329 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9331 *cost
= LIBCALL_COST (2);
9332 return false; /* All arguments must be in registers. */
9335 /* MOD by a power of 2 can be expanded as:
9337 and r0, r0, #(n - 1)
9338 and r1, r1, #(n - 1)
9339 rsbpl r0, r1, #0. */
9340 if (CONST_INT_P (XEXP (x
, 1))
9341 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9344 *cost
+= COSTS_N_INSNS (3);
9347 *cost
+= 2 * extra_cost
->alu
.logical
9348 + extra_cost
->alu
.arith
;
9354 *cost
= LIBCALL_COST (2);
9355 return false; /* All arguments must be in registers. */
9358 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9360 *cost
+= (COSTS_N_INSNS (1)
9361 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9363 *cost
+= extra_cost
->alu
.shift_reg
;
9371 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9373 *cost
+= (COSTS_N_INSNS (2)
9374 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9376 *cost
+= 2 * extra_cost
->alu
.shift
;
9379 else if (mode
== SImode
)
9381 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9382 /* Slightly disparage register shifts at -Os, but not by much. */
9383 if (!CONST_INT_P (XEXP (x
, 1)))
9384 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9385 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9388 else if (GET_MODE_CLASS (mode
) == MODE_INT
9389 && GET_MODE_SIZE (mode
) < 4)
9393 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9394 /* Slightly disparage register shifts at -Os, but not by
9396 if (!CONST_INT_P (XEXP (x
, 1)))
9397 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9398 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9400 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9402 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9404 /* Can use SBFX/UBFX. */
9406 *cost
+= extra_cost
->alu
.bfx
;
9407 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9411 *cost
+= COSTS_N_INSNS (1);
9412 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9415 if (CONST_INT_P (XEXP (x
, 1)))
9416 *cost
+= 2 * extra_cost
->alu
.shift
;
9418 *cost
+= (extra_cost
->alu
.shift
9419 + extra_cost
->alu
.shift_reg
);
9422 /* Slightly disparage register shifts. */
9423 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9428 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9429 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9432 if (CONST_INT_P (XEXP (x
, 1)))
9433 *cost
+= (2 * extra_cost
->alu
.shift
9434 + extra_cost
->alu
.log_shift
);
9436 *cost
+= (extra_cost
->alu
.shift
9437 + extra_cost
->alu
.shift_reg
9438 + extra_cost
->alu
.log_shift_reg
);
9444 *cost
= LIBCALL_COST (2);
9453 *cost
+= extra_cost
->alu
.rev
;
9460 /* No rev instruction available. Look at arm_legacy_rev
9461 and thumb_legacy_rev for the form of RTL used then. */
9464 *cost
+= COSTS_N_INSNS (9);
9468 *cost
+= 6 * extra_cost
->alu
.shift
;
9469 *cost
+= 3 * extra_cost
->alu
.logical
;
9474 *cost
+= COSTS_N_INSNS (4);
9478 *cost
+= 2 * extra_cost
->alu
.shift
;
9479 *cost
+= extra_cost
->alu
.arith_shift
;
9480 *cost
+= 2 * extra_cost
->alu
.logical
;
9488 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9489 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9491 if (GET_CODE (XEXP (x
, 0)) == MULT
9492 || GET_CODE (XEXP (x
, 1)) == MULT
)
9494 rtx mul_op0
, mul_op1
, sub_op
;
9497 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9499 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9501 mul_op0
= XEXP (XEXP (x
, 0), 0);
9502 mul_op1
= XEXP (XEXP (x
, 0), 1);
9503 sub_op
= XEXP (x
, 1);
9507 mul_op0
= XEXP (XEXP (x
, 1), 0);
9508 mul_op1
= XEXP (XEXP (x
, 1), 1);
9509 sub_op
= XEXP (x
, 0);
9512 /* The first operand of the multiply may be optionally
9514 if (GET_CODE (mul_op0
) == NEG
)
9515 mul_op0
= XEXP (mul_op0
, 0);
9517 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9518 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9519 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9525 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9531 rtx shift_by_reg
= NULL
;
9535 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9536 if (shift_op
== NULL
)
9538 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9539 non_shift_op
= XEXP (x
, 0);
9542 non_shift_op
= XEXP (x
, 1);
9544 if (shift_op
!= NULL
)
9546 if (shift_by_reg
!= NULL
)
9549 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9550 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9553 *cost
+= extra_cost
->alu
.arith_shift
;
9555 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9556 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9561 && GET_CODE (XEXP (x
, 1)) == MULT
)
9565 *cost
+= extra_cost
->mult
[0].add
;
9566 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9567 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9568 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9572 if (CONST_INT_P (XEXP (x
, 0)))
9574 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9575 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9577 *cost
= COSTS_N_INSNS (insns
);
9579 *cost
+= insns
* extra_cost
->alu
.arith
;
9580 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9584 *cost
+= extra_cost
->alu
.arith
;
9589 if (GET_MODE_CLASS (mode
) == MODE_INT
9590 && GET_MODE_SIZE (mode
) < 4)
9592 rtx shift_op
, shift_reg
;
9595 /* We check both sides of the MINUS for shifter operands since,
9596 unlike PLUS, it's not commutative. */
9598 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0)
9599 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1)
9601 /* Slightly disparage, as we might need to widen the result. */
9604 *cost
+= extra_cost
->alu
.arith
;
9606 if (CONST_INT_P (XEXP (x
, 0)))
9608 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9617 *cost
+= COSTS_N_INSNS (1);
9619 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9621 rtx op1
= XEXP (x
, 1);
9624 *cost
+= 2 * extra_cost
->alu
.arith
;
9626 if (GET_CODE (op1
) == ZERO_EXTEND
)
9627 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9630 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9631 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9635 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9638 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9639 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9641 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9644 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9645 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9648 *cost
+= (extra_cost
->alu
.arith
9649 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9650 ? extra_cost
->alu
.arith
9651 : extra_cost
->alu
.arith_shift
));
9652 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9653 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9654 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9659 *cost
+= 2 * extra_cost
->alu
.arith
;
9665 *cost
= LIBCALL_COST (2);
9669 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9670 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9672 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9674 rtx mul_op0
, mul_op1
, add_op
;
9677 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9679 mul_op0
= XEXP (XEXP (x
, 0), 0);
9680 mul_op1
= XEXP (XEXP (x
, 0), 1);
9681 add_op
= XEXP (x
, 1);
9683 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9684 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9685 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9691 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9694 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9696 *cost
= LIBCALL_COST (2);
9700 /* Narrow modes can be synthesized in SImode, but the range
9701 of useful sub-operations is limited. Check for shift operations
9702 on one of the operands. Only left shifts can be used in the
9704 if (GET_MODE_CLASS (mode
) == MODE_INT
9705 && GET_MODE_SIZE (mode
) < 4)
9707 rtx shift_op
, shift_reg
;
9710 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0)
9712 if (CONST_INT_P (XEXP (x
, 1)))
9714 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9715 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9717 *cost
= COSTS_N_INSNS (insns
);
9719 *cost
+= insns
* extra_cost
->alu
.arith
;
9720 /* Slightly penalize a narrow operation as the result may
9722 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9726 /* Slightly penalize a narrow operation as the result may
9730 *cost
+= extra_cost
->alu
.arith
;
9737 rtx shift_op
, shift_reg
;
9740 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9741 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9743 /* UXTA[BH] or SXTA[BH]. */
9745 *cost
+= extra_cost
->alu
.extend_arith
;
9746 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9748 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9753 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9754 if (shift_op
!= NULL
)
9759 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9760 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9763 *cost
+= extra_cost
->alu
.arith_shift
;
9765 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9766 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9769 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9771 rtx mul_op
= XEXP (x
, 0);
9773 if (TARGET_DSP_MULTIPLY
9774 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9775 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9776 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9777 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9778 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9779 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9780 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9781 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9782 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9783 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9784 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9785 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9790 *cost
+= extra_cost
->mult
[0].extend_add
;
9791 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9792 SIGN_EXTEND
, 0, speed_p
)
9793 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9794 SIGN_EXTEND
, 0, speed_p
)
9795 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9800 *cost
+= extra_cost
->mult
[0].add
;
9801 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9802 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9803 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9806 if (CONST_INT_P (XEXP (x
, 1)))
9808 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9809 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9811 *cost
= COSTS_N_INSNS (insns
);
9813 *cost
+= insns
* extra_cost
->alu
.arith
;
9814 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9818 *cost
+= extra_cost
->alu
.arith
;
9826 && GET_CODE (XEXP (x
, 0)) == MULT
9827 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9828 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9829 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9830 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9833 *cost
+= extra_cost
->mult
[1].extend_add
;
9834 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9835 ZERO_EXTEND
, 0, speed_p
)
9836 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9837 ZERO_EXTEND
, 0, speed_p
)
9838 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9842 *cost
+= COSTS_N_INSNS (1);
9844 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9845 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9848 *cost
+= (extra_cost
->alu
.arith
9849 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9850 ? extra_cost
->alu
.arith
9851 : extra_cost
->alu
.arith_shift
));
9853 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9855 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9860 *cost
+= 2 * extra_cost
->alu
.arith
;
9865 *cost
= LIBCALL_COST (2);
9868 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9871 *cost
+= extra_cost
->alu
.rev
;
9879 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9880 rtx op0
= XEXP (x
, 0);
9881 rtx shift_op
, shift_reg
;
9885 || (code
== IOR
&& TARGET_THUMB2
)))
9886 op0
= XEXP (op0
, 0);
9889 shift_op
= shifter_op_p (op0
, &shift_reg
);
9890 if (shift_op
!= NULL
)
9895 *cost
+= extra_cost
->alu
.log_shift_reg
;
9896 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9899 *cost
+= extra_cost
->alu
.log_shift
;
9901 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9902 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9906 if (CONST_INT_P (XEXP (x
, 1)))
9908 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9909 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9912 *cost
= COSTS_N_INSNS (insns
);
9914 *cost
+= insns
* extra_cost
->alu
.logical
;
9915 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9920 *cost
+= extra_cost
->alu
.logical
;
9921 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9922 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9928 rtx op0
= XEXP (x
, 0);
9929 enum rtx_code subcode
= GET_CODE (op0
);
9931 *cost
+= COSTS_N_INSNS (1);
9935 || (code
== IOR
&& TARGET_THUMB2
)))
9936 op0
= XEXP (op0
, 0);
9938 if (GET_CODE (op0
) == ZERO_EXTEND
)
9941 *cost
+= 2 * extra_cost
->alu
.logical
;
9943 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
9945 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9948 else if (GET_CODE (op0
) == SIGN_EXTEND
)
9951 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
9953 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
9955 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
9960 *cost
+= 2 * extra_cost
->alu
.logical
;
9966 *cost
= LIBCALL_COST (2);
9970 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9971 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9973 rtx op0
= XEXP (x
, 0);
9975 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
9976 op0
= XEXP (op0
, 0);
9979 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
9981 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
9982 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
9985 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9987 *cost
= LIBCALL_COST (2);
9993 if (TARGET_DSP_MULTIPLY
9994 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
9995 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
9996 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
9997 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
9998 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
9999 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10000 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10001 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10002 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10003 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10004 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10005 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10008 /* SMUL[TB][TB]. */
10010 *cost
+= extra_cost
->mult
[0].extend
;
10011 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10012 SIGN_EXTEND
, 0, speed_p
);
10013 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10014 SIGN_EXTEND
, 1, speed_p
);
10018 *cost
+= extra_cost
->mult
[0].simple
;
10022 if (mode
== DImode
)
10025 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10026 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10027 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10028 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10031 *cost
+= extra_cost
->mult
[1].extend
;
10032 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10033 ZERO_EXTEND
, 0, speed_p
)
10034 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10035 ZERO_EXTEND
, 0, speed_p
));
10039 *cost
= LIBCALL_COST (2);
10044 *cost
= LIBCALL_COST (2);
10048 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10049 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10051 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10054 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10059 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10063 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10065 *cost
= LIBCALL_COST (1);
10069 if (mode
== SImode
)
10071 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10073 *cost
+= COSTS_N_INSNS (1);
10074 /* Assume the non-flag-changing variant. */
10076 *cost
+= (extra_cost
->alu
.log_shift
10077 + extra_cost
->alu
.arith_shift
);
10078 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10082 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10083 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10085 *cost
+= COSTS_N_INSNS (1);
10086 /* No extra cost for MOV imm and MVN imm. */
10087 /* If the comparison op is using the flags, there's no further
10088 cost, otherwise we need to add the cost of the comparison. */
10089 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10090 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10091 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10093 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10094 *cost
+= (COSTS_N_INSNS (1)
10095 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10097 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10100 *cost
+= extra_cost
->alu
.arith
;
10106 *cost
+= extra_cost
->alu
.arith
;
10110 if (GET_MODE_CLASS (mode
) == MODE_INT
10111 && GET_MODE_SIZE (mode
) < 4)
10113 /* Slightly disparage, as we might need an extend operation. */
10116 *cost
+= extra_cost
->alu
.arith
;
10120 if (mode
== DImode
)
10122 *cost
+= COSTS_N_INSNS (1);
10124 *cost
+= 2 * extra_cost
->alu
.arith
;
10129 *cost
= LIBCALL_COST (1);
10133 if (mode
== SImode
)
10136 rtx shift_reg
= NULL
;
10138 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10142 if (shift_reg
!= NULL
)
10145 *cost
+= extra_cost
->alu
.log_shift_reg
;
10146 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10149 *cost
+= extra_cost
->alu
.log_shift
;
10150 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10155 *cost
+= extra_cost
->alu
.logical
;
10158 if (mode
== DImode
)
10160 *cost
+= COSTS_N_INSNS (1);
10166 *cost
+= LIBCALL_COST (1);
10171 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10173 *cost
+= COSTS_N_INSNS (3);
10176 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10177 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10179 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10180 /* Assume that if one arm of the if_then_else is a register,
10181 that it will be tied with the result and eliminate the
10182 conditional insn. */
10183 if (REG_P (XEXP (x
, 1)))
10185 else if (REG_P (XEXP (x
, 2)))
10191 if (extra_cost
->alu
.non_exec_costs_exec
)
10192 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10194 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10197 *cost
+= op1cost
+ op2cost
;
10203 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10207 machine_mode op0mode
;
10208 /* We'll mostly assume that the cost of a compare is the cost of the
10209 LHS. However, there are some notable exceptions. */
10211 /* Floating point compares are never done as side-effects. */
10212 op0mode
= GET_MODE (XEXP (x
, 0));
10213 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10214 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10217 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10219 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10221 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10227 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10229 *cost
= LIBCALL_COST (2);
10233 /* DImode compares normally take two insns. */
10234 if (op0mode
== DImode
)
10236 *cost
+= COSTS_N_INSNS (1);
10238 *cost
+= 2 * extra_cost
->alu
.arith
;
10242 if (op0mode
== SImode
)
10247 if (XEXP (x
, 1) == const0_rtx
10248 && !(REG_P (XEXP (x
, 0))
10249 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10250 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10252 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10254 /* Multiply operations that set the flags are often
10255 significantly more expensive. */
10257 && GET_CODE (XEXP (x
, 0)) == MULT
10258 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10259 *cost
+= extra_cost
->mult
[0].flag_setting
;
10262 && GET_CODE (XEXP (x
, 0)) == PLUS
10263 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10264 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10266 *cost
+= extra_cost
->mult
[0].flag_setting
;
10271 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10272 if (shift_op
!= NULL
)
10274 if (shift_reg
!= NULL
)
10276 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10279 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10282 *cost
+= extra_cost
->alu
.arith_shift
;
10283 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10284 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10289 *cost
+= extra_cost
->alu
.arith
;
10290 if (CONST_INT_P (XEXP (x
, 1))
10291 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10293 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10301 *cost
= LIBCALL_COST (2);
10324 if (outer_code
== SET
)
10326 /* Is it a store-flag operation? */
10327 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10328 && XEXP (x
, 1) == const0_rtx
)
10330 /* Thumb also needs an IT insn. */
10331 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10334 if (XEXP (x
, 1) == const0_rtx
)
10339 /* LSR Rd, Rn, #31. */
10341 *cost
+= extra_cost
->alu
.shift
;
10351 *cost
+= COSTS_N_INSNS (1);
10355 /* RSBS T1, Rn, Rn, LSR #31
10357 *cost
+= COSTS_N_INSNS (1);
10359 *cost
+= extra_cost
->alu
.arith_shift
;
10363 /* RSB Rd, Rn, Rn, ASR #1
10364 LSR Rd, Rd, #31. */
10365 *cost
+= COSTS_N_INSNS (1);
10367 *cost
+= (extra_cost
->alu
.arith_shift
10368 + extra_cost
->alu
.shift
);
10374 *cost
+= COSTS_N_INSNS (1);
10376 *cost
+= extra_cost
->alu
.shift
;
10380 /* Remaining cases are either meaningless or would take
10381 three insns anyway. */
10382 *cost
= COSTS_N_INSNS (3);
10385 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10390 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10391 if (CONST_INT_P (XEXP (x
, 1))
10392 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10394 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10401 /* Not directly inside a set. If it involves the condition code
10402 register it must be the condition for a branch, cond_exec or
10403 I_T_E operation. Since the comparison is performed elsewhere
10404 this is just the control part which has no additional
10406 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10407 && XEXP (x
, 1) == const0_rtx
)
10415 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10416 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10419 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10423 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10425 *cost
= LIBCALL_COST (1);
10429 if (mode
== SImode
)
10432 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10436 *cost
= LIBCALL_COST (1);
10440 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10441 && MEM_P (XEXP (x
, 0)))
10443 if (mode
== DImode
)
10444 *cost
+= COSTS_N_INSNS (1);
10449 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10450 *cost
+= extra_cost
->ldst
.load
;
10452 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10454 if (mode
== DImode
)
10455 *cost
+= extra_cost
->alu
.shift
;
10460 /* Widening from less than 32-bits requires an extend operation. */
10461 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10463 /* We have SXTB/SXTH. */
10464 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10466 *cost
+= extra_cost
->alu
.extend
;
10468 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10470 /* Needs two shifts. */
10471 *cost
+= COSTS_N_INSNS (1);
10472 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10474 *cost
+= 2 * extra_cost
->alu
.shift
;
10477 /* Widening beyond 32-bits requires one more insn. */
10478 if (mode
== DImode
)
10480 *cost
+= COSTS_N_INSNS (1);
10482 *cost
+= extra_cost
->alu
.shift
;
10489 || GET_MODE (XEXP (x
, 0)) == SImode
10490 || GET_MODE (XEXP (x
, 0)) == QImode
)
10491 && MEM_P (XEXP (x
, 0)))
10493 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10495 if (mode
== DImode
)
10496 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10501 /* Widening from less than 32-bits requires an extend operation. */
10502 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10504 /* UXTB can be a shorter instruction in Thumb2, but it might
10505 be slower than the AND Rd, Rn, #255 alternative. When
10506 optimizing for speed it should never be slower to use
10507 AND, and we don't really model 16-bit vs 32-bit insns
10510 *cost
+= extra_cost
->alu
.logical
;
10512 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10514 /* We have UXTB/UXTH. */
10515 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10517 *cost
+= extra_cost
->alu
.extend
;
10519 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10521 /* Needs two shifts. It's marginally preferable to use
10522 shifts rather than two BIC instructions as the second
10523 shift may merge with a subsequent insn as a shifter
10525 *cost
= COSTS_N_INSNS (2);
10526 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10528 *cost
+= 2 * extra_cost
->alu
.shift
;
10531 /* Widening beyond 32-bits requires one more insn. */
10532 if (mode
== DImode
)
10534 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10541 /* CONST_INT has no mode, so we cannot tell for sure how many
10542 insns are really going to be needed. The best we can do is
10543 look at the value passed. If it fits in SImode, then assume
10544 that's the mode it will be used for. Otherwise assume it
10545 will be used in DImode. */
10546 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10551 /* Avoid blowing up in arm_gen_constant (). */
10552 if (!(outer_code
== PLUS
10553 || outer_code
== AND
10554 || outer_code
== IOR
10555 || outer_code
== XOR
10556 || outer_code
== MINUS
))
10560 if (mode
== SImode
)
10562 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10563 INTVAL (x
), NULL
, NULL
,
10569 *cost
+= COSTS_N_INSNS (arm_gen_constant
10570 (outer_code
, SImode
, NULL
,
10571 trunc_int_for_mode (INTVAL (x
), SImode
),
10573 + arm_gen_constant (outer_code
, SImode
, NULL
,
10574 INTVAL (x
) >> 32, NULL
,
10586 if (arm_arch_thumb2
&& !flag_pic
)
10587 *cost
+= COSTS_N_INSNS (1);
10589 *cost
+= extra_cost
->ldst
.load
;
10592 *cost
+= COSTS_N_INSNS (1);
10596 *cost
+= COSTS_N_INSNS (1);
10598 *cost
+= extra_cost
->alu
.arith
;
10604 *cost
= COSTS_N_INSNS (4);
10609 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10610 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10612 if (vfp3_const_double_rtx (x
))
10615 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10621 if (mode
== DFmode
)
10622 *cost
+= extra_cost
->ldst
.loadd
;
10624 *cost
+= extra_cost
->ldst
.loadf
;
10627 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10631 *cost
= COSTS_N_INSNS (4);
10637 && TARGET_HARD_FLOAT
10638 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10639 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10640 *cost
= COSTS_N_INSNS (1);
10642 *cost
= COSTS_N_INSNS (4);
10647 /* When optimizing for size, we prefer constant pool entries to
10648 MOVW/MOVT pairs, so bump the cost of these slightly. */
10655 *cost
+= extra_cost
->alu
.clz
;
10659 if (XEXP (x
, 1) == const0_rtx
)
10662 *cost
+= extra_cost
->alu
.log_shift
;
10663 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10666 /* Fall through. */
10670 *cost
+= COSTS_N_INSNS (1);
10674 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10675 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10676 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10677 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10678 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10679 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10680 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10681 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10685 *cost
+= extra_cost
->mult
[1].extend
;
10686 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10687 ZERO_EXTEND
, 0, speed_p
)
10688 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10689 ZERO_EXTEND
, 0, speed_p
));
10692 *cost
= LIBCALL_COST (1);
10695 case UNSPEC_VOLATILE
:
10697 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10700 /* Reading the PC is like reading any other register. Writing it
10701 is more expensive, but we take that into account elsewhere. */
10706 /* TODO: Simple zero_extract of bottom bits using AND. */
10707 /* Fall through. */
10711 && CONST_INT_P (XEXP (x
, 1))
10712 && CONST_INT_P (XEXP (x
, 2)))
10715 *cost
+= extra_cost
->alu
.bfx
;
10716 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10719 /* Without UBFX/SBFX, need to resort to shift operations. */
10720 *cost
+= COSTS_N_INSNS (1);
10722 *cost
+= 2 * extra_cost
->alu
.shift
;
10723 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10727 if (TARGET_HARD_FLOAT
)
10730 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10731 if (!TARGET_FPU_ARMV8
10732 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10734 /* Pre v8, widening HF->DF is a two-step process, first
10735 widening to SFmode. */
10736 *cost
+= COSTS_N_INSNS (1);
10738 *cost
+= extra_cost
->fp
[0].widen
;
10740 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10744 *cost
= LIBCALL_COST (1);
10747 case FLOAT_TRUNCATE
:
10748 if (TARGET_HARD_FLOAT
)
10751 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10752 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10754 /* Vector modes? */
10756 *cost
= LIBCALL_COST (1);
10760 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10762 rtx op0
= XEXP (x
, 0);
10763 rtx op1
= XEXP (x
, 1);
10764 rtx op2
= XEXP (x
, 2);
10767 /* vfms or vfnma. */
10768 if (GET_CODE (op0
) == NEG
)
10769 op0
= XEXP (op0
, 0);
10771 /* vfnms or vfnma. */
10772 if (GET_CODE (op2
) == NEG
)
10773 op2
= XEXP (op2
, 0);
10775 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10776 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10777 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10780 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10785 *cost
= LIBCALL_COST (3);
10790 if (TARGET_HARD_FLOAT
)
10792 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10793 a vcvt fixed-point conversion. */
10794 if (code
== FIX
&& mode
== SImode
10795 && GET_CODE (XEXP (x
, 0)) == FIX
10796 && GET_MODE (XEXP (x
, 0)) == SFmode
10797 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10798 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10802 *cost
+= extra_cost
->fp
[0].toint
;
10804 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10809 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10811 mode
= GET_MODE (XEXP (x
, 0));
10813 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10814 /* Strip of the 'cost' of rounding towards zero. */
10815 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10816 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10819 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10820 /* ??? Increase the cost to deal with transferring from
10821 FP -> CORE registers? */
10824 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10825 && TARGET_FPU_ARMV8
)
10828 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10831 /* Vector costs? */
10833 *cost
= LIBCALL_COST (1);
10837 case UNSIGNED_FLOAT
:
10838 if (TARGET_HARD_FLOAT
)
10840 /* ??? Increase the cost to deal with transferring from CORE
10841 -> FP registers? */
10843 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10846 *cost
= LIBCALL_COST (1);
10854 /* Just a guess. Guess number of instructions in the asm
10855 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10856 though (see PR60663). */
10857 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10858 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10860 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10864 if (mode
!= VOIDmode
)
10865 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10867 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10872 #undef HANDLE_NARROW_SHIFT_ARITH
10874 /* RTX costs entry point. */
10877 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10878 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10881 int code
= GET_CODE (x
);
10882 gcc_assert (current_tune
->insn_extra_cost
);
10884 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10885 (enum rtx_code
) outer_code
,
10886 current_tune
->insn_extra_cost
,
10889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10891 print_rtl_single (dump_file
, x
);
10892 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10893 *total
, result
? "final" : "partial");
10898 /* All address computations that can be done are free, but rtx cost returns
10899 the same for practically all of them. So we weight the different types
10900 of address here in the order (most pref first):
10901 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10903 arm_arm_address_cost (rtx x
)
10905 enum rtx_code c
= GET_CODE (x
);
10907 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10909 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10914 if (CONST_INT_P (XEXP (x
, 1)))
10917 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10927 arm_thumb_address_cost (rtx x
)
10929 enum rtx_code c
= GET_CODE (x
);
10934 && REG_P (XEXP (x
, 0))
10935 && CONST_INT_P (XEXP (x
, 1)))
10942 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
10943 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
10945 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
10948 /* Adjust cost hook for XScale. */
10950 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
10953 /* Some true dependencies can have a higher cost depending
10954 on precisely how certain input operands are used. */
10956 && recog_memoized (insn
) >= 0
10957 && recog_memoized (dep
) >= 0)
10959 int shift_opnum
= get_attr_shift (insn
);
10960 enum attr_type attr_type
= get_attr_type (dep
);
10962 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10963 operand for INSN. If we have a shifted input operand and the
10964 instruction we depend on is another ALU instruction, then we may
10965 have to account for an additional stall. */
10966 if (shift_opnum
!= 0
10967 && (attr_type
== TYPE_ALU_SHIFT_IMM
10968 || attr_type
== TYPE_ALUS_SHIFT_IMM
10969 || attr_type
== TYPE_LOGIC_SHIFT_IMM
10970 || attr_type
== TYPE_LOGICS_SHIFT_IMM
10971 || attr_type
== TYPE_ALU_SHIFT_REG
10972 || attr_type
== TYPE_ALUS_SHIFT_REG
10973 || attr_type
== TYPE_LOGIC_SHIFT_REG
10974 || attr_type
== TYPE_LOGICS_SHIFT_REG
10975 || attr_type
== TYPE_MOV_SHIFT
10976 || attr_type
== TYPE_MVN_SHIFT
10977 || attr_type
== TYPE_MOV_SHIFT_REG
10978 || attr_type
== TYPE_MVN_SHIFT_REG
))
10980 rtx shifted_operand
;
10983 /* Get the shifted operand. */
10984 extract_insn (insn
);
10985 shifted_operand
= recog_data
.operand
[shift_opnum
];
10987 /* Iterate over all the operands in DEP. If we write an operand
10988 that overlaps with SHIFTED_OPERAND, then we have increase the
10989 cost of this dependency. */
10990 extract_insn (dep
);
10991 preprocess_constraints (dep
);
10992 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
10994 /* We can ignore strict inputs. */
10995 if (recog_data
.operand_type
[opno
] == OP_IN
)
10998 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11010 /* Adjust cost hook for Cortex A9. */
11012 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11022 case REG_DEP_OUTPUT
:
11023 if (recog_memoized (insn
) >= 0
11024 && recog_memoized (dep
) >= 0)
11026 if (GET_CODE (PATTERN (insn
)) == SET
)
11029 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11031 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11033 enum attr_type attr_type_insn
= get_attr_type (insn
);
11034 enum attr_type attr_type_dep
= get_attr_type (dep
);
11036 /* By default all dependencies of the form
11039 have an extra latency of 1 cycle because
11040 of the input and output dependency in this
11041 case. However this gets modeled as an true
11042 dependency and hence all these checks. */
11043 if (REG_P (SET_DEST (PATTERN (insn
)))
11044 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11046 /* FMACS is a special case where the dependent
11047 instruction can be issued 3 cycles before
11048 the normal latency in case of an output
11050 if ((attr_type_insn
== TYPE_FMACS
11051 || attr_type_insn
== TYPE_FMACD
)
11052 && (attr_type_dep
== TYPE_FMACS
11053 || attr_type_dep
== TYPE_FMACD
))
11055 if (dep_type
== REG_DEP_OUTPUT
)
11056 *cost
= insn_default_latency (dep
) - 3;
11058 *cost
= insn_default_latency (dep
);
11063 if (dep_type
== REG_DEP_OUTPUT
)
11064 *cost
= insn_default_latency (dep
) + 1;
11066 *cost
= insn_default_latency (dep
);
11076 gcc_unreachable ();
11082 /* Adjust cost hook for FA726TE. */
11084 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11087 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11088 have penalty of 3. */
11089 if (dep_type
== REG_DEP_TRUE
11090 && recog_memoized (insn
) >= 0
11091 && recog_memoized (dep
) >= 0
11092 && get_attr_conds (dep
) == CONDS_SET
)
11094 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11095 if (get_attr_conds (insn
) == CONDS_USE
11096 && get_attr_type (insn
) != TYPE_BRANCH
)
11102 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11103 || get_attr_conds (insn
) == CONDS_USE
)
11113 /* Implement TARGET_REGISTER_MOVE_COST.
11115 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11116 it is typically more expensive than a single memory access. We set
11117 the cost to less than two memory accesses so that floating
11118 point to integer conversion does not go through memory. */
11121 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11122 reg_class_t from
, reg_class_t to
)
11126 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11127 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11129 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11130 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11132 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11139 if (from
== HI_REGS
|| to
== HI_REGS
)
11146 /* Implement TARGET_MEMORY_MOVE_COST. */
11149 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11150 bool in ATTRIBUTE_UNUSED
)
11156 if (GET_MODE_SIZE (mode
) < 4)
11159 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11163 /* Vectorizer cost model implementation. */
11165 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11167 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11169 int misalign ATTRIBUTE_UNUSED
)
11173 switch (type_of_cost
)
11176 return current_tune
->vec_costs
->scalar_stmt_cost
;
11179 return current_tune
->vec_costs
->scalar_load_cost
;
11182 return current_tune
->vec_costs
->scalar_store_cost
;
11185 return current_tune
->vec_costs
->vec_stmt_cost
;
11188 return current_tune
->vec_costs
->vec_align_load_cost
;
11191 return current_tune
->vec_costs
->vec_store_cost
;
11193 case vec_to_scalar
:
11194 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11196 case scalar_to_vec
:
11197 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11199 case unaligned_load
:
11200 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11202 case unaligned_store
:
11203 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11205 case cond_branch_taken
:
11206 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11208 case cond_branch_not_taken
:
11209 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11212 case vec_promote_demote
:
11213 return current_tune
->vec_costs
->vec_stmt_cost
;
11215 case vec_construct
:
11216 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11217 return elements
/ 2 + 1;
11220 gcc_unreachable ();
11224 /* Implement targetm.vectorize.add_stmt_cost. */
11227 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11228 struct _stmt_vec_info
*stmt_info
, int misalign
,
11229 enum vect_cost_model_location where
)
11231 unsigned *cost
= (unsigned *) data
;
11232 unsigned retval
= 0;
11234 if (flag_vect_cost_model
)
11236 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11237 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11239 /* Statements in an inner loop relative to the loop being
11240 vectorized are weighted more heavily. The value here is
11241 arbitrary and could potentially be improved with analysis. */
11242 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11243 count
*= 50; /* FIXME. */
11245 retval
= (unsigned) (count
* stmt_cost
);
11246 cost
[where
] += retval
;
11252 /* Return true if and only if this insn can dual-issue only as older. */
11254 cortexa7_older_only (rtx_insn
*insn
)
11256 if (recog_memoized (insn
) < 0)
11259 switch (get_attr_type (insn
))
11261 case TYPE_ALU_DSP_REG
:
11262 case TYPE_ALU_SREG
:
11263 case TYPE_ALUS_SREG
:
11264 case TYPE_LOGIC_REG
:
11265 case TYPE_LOGICS_REG
:
11267 case TYPE_ADCS_REG
:
11272 case TYPE_SHIFT_IMM
:
11273 case TYPE_SHIFT_REG
:
11274 case TYPE_LOAD_BYTE
:
11277 case TYPE_FFARITHS
:
11279 case TYPE_FFARITHD
:
11297 case TYPE_F_STORES
:
11304 /* Return true if and only if this insn can dual-issue as younger. */
11306 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11308 if (recog_memoized (insn
) < 0)
11311 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11315 switch (get_attr_type (insn
))
11318 case TYPE_ALUS_IMM
:
11319 case TYPE_LOGIC_IMM
:
11320 case TYPE_LOGICS_IMM
:
11325 case TYPE_MOV_SHIFT
:
11326 case TYPE_MOV_SHIFT_REG
:
11336 /* Look for an instruction that can dual issue only as an older
11337 instruction, and move it in front of any instructions that can
11338 dual-issue as younger, while preserving the relative order of all
11339 other instructions in the ready list. This is a hueuristic to help
11340 dual-issue in later cycles, by postponing issue of more flexible
11341 instructions. This heuristic may affect dual issue opportunities
11342 in the current cycle. */
11344 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11345 int *n_readyp
, int clock
)
11348 int first_older_only
= -1, first_younger
= -1;
11352 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11356 /* Traverse the ready list from the head (the instruction to issue
11357 first), and looking for the first instruction that can issue as
11358 younger and the first instruction that can dual-issue only as
11360 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11362 rtx_insn
*insn
= ready
[i
];
11363 if (cortexa7_older_only (insn
))
11365 first_older_only
= i
;
11367 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11370 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11374 /* Nothing to reorder because either no younger insn found or insn
11375 that can dual-issue only as older appears before any insn that
11376 can dual-issue as younger. */
11377 if (first_younger
== -1)
11380 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11384 /* Nothing to reorder because no older-only insn in the ready list. */
11385 if (first_older_only
== -1)
11388 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11392 /* Move first_older_only insn before first_younger. */
11394 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11395 INSN_UID(ready
[first_older_only
]),
11396 INSN_UID(ready
[first_younger
]));
11397 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11398 for (i
= first_older_only
; i
< first_younger
; i
++)
11400 ready
[i
] = ready
[i
+1];
11403 ready
[i
] = first_older_only_insn
;
11407 /* Implement TARGET_SCHED_REORDER. */
11409 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11414 case TARGET_CPU_cortexa7
:
11415 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11418 /* Do nothing for other cores. */
11422 return arm_issue_rate ();
11425 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11426 It corrects the value of COST based on the relationship between
11427 INSN and DEP through the dependence LINK. It returns the new
11428 value. There is a per-core adjust_cost hook to adjust scheduler costs
11429 and the per-core hook can choose to completely override the generic
11430 adjust_cost function. Only put bits of code into arm_adjust_cost that
11431 are common across all cores. */
11433 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11438 /* When generating Thumb-1 code, we want to place flag-setting operations
11439 close to a conditional branch which depends on them, so that we can
11440 omit the comparison. */
11443 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11444 && recog_memoized (dep
) >= 0
11445 && get_attr_conds (dep
) == CONDS_SET
)
11448 if (current_tune
->sched_adjust_cost
!= NULL
)
11450 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11454 /* XXX Is this strictly true? */
11455 if (dep_type
== REG_DEP_ANTI
11456 || dep_type
== REG_DEP_OUTPUT
)
11459 /* Call insns don't incur a stall, even if they follow a load. */
11464 if ((i_pat
= single_set (insn
)) != NULL
11465 && MEM_P (SET_SRC (i_pat
))
11466 && (d_pat
= single_set (dep
)) != NULL
11467 && MEM_P (SET_DEST (d_pat
)))
11469 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11470 /* This is a load after a store, there is no conflict if the load reads
11471 from a cached area. Assume that loads from the stack, and from the
11472 constant pool are cached, and that others will miss. This is a
11475 if ((GET_CODE (src_mem
) == SYMBOL_REF
11476 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11477 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11478 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11479 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11487 arm_max_conditional_execute (void)
11489 return max_insns_skipped
;
11493 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11496 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11498 return (optimize
> 0) ? 2 : 0;
11502 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11504 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11507 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11508 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11509 sequences of non-executed instructions in IT blocks probably take the same
11510 amount of time as executed instructions (and the IT instruction itself takes
11511 space in icache). This function was experimentally determined to give good
11512 results on a popular embedded benchmark. */
11515 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11517 return (TARGET_32BIT
&& speed_p
) ? 1
11518 : arm_default_branch_cost (speed_p
, predictable_p
);
11522 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11524 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11527 static bool fp_consts_inited
= false;
11529 static REAL_VALUE_TYPE value_fp0
;
11532 init_fp_table (void)
11536 r
= REAL_VALUE_ATOF ("0", DFmode
);
11538 fp_consts_inited
= true;
11541 /* Return TRUE if rtx X is a valid immediate FP constant. */
11543 arm_const_double_rtx (rtx x
)
11545 const REAL_VALUE_TYPE
*r
;
11547 if (!fp_consts_inited
)
11550 r
= CONST_DOUBLE_REAL_VALUE (x
);
11551 if (REAL_VALUE_MINUS_ZERO (*r
))
11554 if (real_equal (r
, &value_fp0
))
11560 /* VFPv3 has a fairly wide range of representable immediates, formed from
11561 "quarter-precision" floating-point values. These can be evaluated using this
11562 formula (with ^ for exponentiation):
11566 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11567 16 <= n <= 31 and 0 <= r <= 7.
11569 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11571 - A (most-significant) is the sign bit.
11572 - BCD are the exponent (encoded as r XOR 3).
11573 - EFGH are the mantissa (encoded as n - 16).
11576 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11577 fconst[sd] instruction, or -1 if X isn't suitable. */
11579 vfp3_const_double_index (rtx x
)
11581 REAL_VALUE_TYPE r
, m
;
11582 int sign
, exponent
;
11583 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11584 unsigned HOST_WIDE_INT mask
;
11585 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11588 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11591 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11593 /* We can't represent these things, so detect them first. */
11594 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11597 /* Extract sign, exponent and mantissa. */
11598 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11599 r
= real_value_abs (&r
);
11600 exponent
= REAL_EXP (&r
);
11601 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11602 highest (sign) bit, with a fixed binary point at bit point_pos.
11603 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11604 bits for the mantissa, this may fail (low bits would be lost). */
11605 real_ldexp (&m
, &r
, point_pos
- exponent
);
11606 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11607 mantissa
= w
.elt (0);
11608 mant_hi
= w
.elt (1);
11610 /* If there are bits set in the low part of the mantissa, we can't
11611 represent this value. */
11615 /* Now make it so that mantissa contains the most-significant bits, and move
11616 the point_pos to indicate that the least-significant bits have been
11618 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11619 mantissa
= mant_hi
;
11621 /* We can permit four significant bits of mantissa only, plus a high bit
11622 which is always 1. */
11623 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11624 if ((mantissa
& mask
) != 0)
11627 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11628 mantissa
>>= point_pos
- 5;
11630 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11631 floating-point immediate zero with Neon using an integer-zero load, but
11632 that case is handled elsewhere.) */
11636 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11638 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11639 normalized significands are in the range [1, 2). (Our mantissa is shifted
11640 left 4 places at this point relative to normalized IEEE754 values). GCC
11641 internally uses [0.5, 1) (see real.c), so the exponent returned from
11642 REAL_EXP must be altered. */
11643 exponent
= 5 - exponent
;
11645 if (exponent
< 0 || exponent
> 7)
11648 /* Sign, mantissa and exponent are now in the correct form to plug into the
11649 formula described in the comment above. */
11650 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11653 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11655 vfp3_const_double_rtx (rtx x
)
11660 return vfp3_const_double_index (x
) != -1;
11663 /* Recognize immediates which can be used in various Neon instructions. Legal
11664 immediates are described by the following table (for VMVN variants, the
11665 bitwise inverse of the constant shown is recognized. In either case, VMOV
11666 is output and the correct instruction to use for a given constant is chosen
11667 by the assembler). The constant shown is replicated across all elements of
11668 the destination vector.
11670 insn elems variant constant (binary)
11671 ---- ----- ------- -----------------
11672 vmov i32 0 00000000 00000000 00000000 abcdefgh
11673 vmov i32 1 00000000 00000000 abcdefgh 00000000
11674 vmov i32 2 00000000 abcdefgh 00000000 00000000
11675 vmov i32 3 abcdefgh 00000000 00000000 00000000
11676 vmov i16 4 00000000 abcdefgh
11677 vmov i16 5 abcdefgh 00000000
11678 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11679 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11680 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11681 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11682 vmvn i16 10 00000000 abcdefgh
11683 vmvn i16 11 abcdefgh 00000000
11684 vmov i32 12 00000000 00000000 abcdefgh 11111111
11685 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11686 vmov i32 14 00000000 abcdefgh 11111111 11111111
11687 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11688 vmov i8 16 abcdefgh
11689 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11690 eeeeeeee ffffffff gggggggg hhhhhhhh
11691 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11692 vmov f32 19 00000000 00000000 00000000 00000000
11694 For case 18, B = !b. Representable values are exactly those accepted by
11695 vfp3_const_double_index, but are output as floating-point numbers rather
11698 For case 19, we will change it to vmov.i32 when assembling.
11700 Variants 0-5 (inclusive) may also be used as immediates for the second
11701 operand of VORR/VBIC instructions.
11703 The INVERSE argument causes the bitwise inverse of the given operand to be
11704 recognized instead (used for recognizing legal immediates for the VAND/VORN
11705 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11706 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11707 output, rather than the real insns vbic/vorr).
11709 INVERSE makes no difference to the recognition of float vectors.
11711 The return value is the variant of immediate as shown in the above table, or
11712 -1 if the given value doesn't match any of the listed patterns.
11715 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11716 rtx
*modconst
, int *elementwidth
)
11718 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11720 for (i = 0; i < idx; i += (STRIDE)) \
11725 immtype = (CLASS); \
11726 elsize = (ELSIZE); \
11730 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11731 unsigned int innersize
;
11732 unsigned char bytes
[16];
11733 int immtype
= -1, matches
;
11734 unsigned int invmask
= inverse
? 0xff : 0;
11735 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11738 n_elts
= CONST_VECTOR_NUNITS (op
);
11742 if (mode
== VOIDmode
)
11746 innersize
= GET_MODE_UNIT_SIZE (mode
);
11748 /* Vectors of float constants. */
11749 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11751 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11753 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11756 /* FP16 vectors cannot be represented. */
11757 if (GET_MODE_INNER (mode
) == HFmode
)
11760 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11761 are distinct in this context. */
11762 if (!const_vec_duplicate_p (op
))
11766 *modconst
= CONST_VECTOR_ELT (op
, 0);
11771 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11777 /* The tricks done in the code below apply for little-endian vector layout.
11778 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11779 FIXME: Implement logic for big-endian vectors. */
11780 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11783 /* Splat vector constant out into a byte vector. */
11784 for (i
= 0; i
< n_elts
; i
++)
11786 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11787 unsigned HOST_WIDE_INT elpart
;
11789 gcc_assert (CONST_INT_P (el
));
11790 elpart
= INTVAL (el
);
11792 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11794 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11795 elpart
>>= BITS_PER_UNIT
;
11799 /* Sanity check. */
11800 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11804 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11805 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11807 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11808 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11810 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11811 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11813 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11814 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11816 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11818 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11820 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11821 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11823 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11824 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11826 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11827 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11829 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11830 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11832 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11834 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11836 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11837 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11839 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11840 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11842 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11843 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11845 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11846 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11848 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11850 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11851 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11859 *elementwidth
= elsize
;
11863 unsigned HOST_WIDE_INT imm
= 0;
11865 /* Un-invert bytes of recognized vector, if necessary. */
11867 for (i
= 0; i
< idx
; i
++)
11868 bytes
[i
] ^= invmask
;
11872 /* FIXME: Broken on 32-bit H_W_I hosts. */
11873 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11875 for (i
= 0; i
< 8; i
++)
11876 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11877 << (i
* BITS_PER_UNIT
);
11879 *modconst
= GEN_INT (imm
);
11883 unsigned HOST_WIDE_INT imm
= 0;
11885 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11886 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11888 *modconst
= GEN_INT (imm
);
11896 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11897 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11898 float elements), and a modified constant (whatever should be output for a
11899 VMOV) in *MODCONST. */
11902 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11903 rtx
*modconst
, int *elementwidth
)
11907 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11913 *modconst
= tmpconst
;
11916 *elementwidth
= tmpwidth
;
11921 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11922 the immediate is valid, write a constant suitable for using as an operand
11923 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11924 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11927 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11928 rtx
*modconst
, int *elementwidth
)
11932 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11934 if (retval
< 0 || retval
> 5)
11938 *modconst
= tmpconst
;
11941 *elementwidth
= tmpwidth
;
11946 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11947 the immediate is valid, write a constant suitable for using as an operand
11948 to VSHR/VSHL to *MODCONST and the corresponding element width to
11949 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11950 because they have different limitations. */
11953 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
11954 rtx
*modconst
, int *elementwidth
,
11957 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11958 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
11959 unsigned HOST_WIDE_INT last_elt
= 0;
11960 unsigned HOST_WIDE_INT maxshift
;
11962 /* Split vector constant out into a byte vector. */
11963 for (i
= 0; i
< n_elts
; i
++)
11965 rtx el
= CONST_VECTOR_ELT (op
, i
);
11966 unsigned HOST_WIDE_INT elpart
;
11968 if (CONST_INT_P (el
))
11969 elpart
= INTVAL (el
);
11970 else if (CONST_DOUBLE_P (el
))
11973 gcc_unreachable ();
11975 if (i
!= 0 && elpart
!= last_elt
)
11981 /* Shift less than element size. */
11982 maxshift
= innersize
* 8;
11986 /* Left shift immediate value can be from 0 to <size>-1. */
11987 if (last_elt
>= maxshift
)
11992 /* Right shift immediate value can be from 1 to <size>. */
11993 if (last_elt
== 0 || last_elt
> maxshift
)
11998 *elementwidth
= innersize
* 8;
12001 *modconst
= CONST_VECTOR_ELT (op
, 0);
12006 /* Return a string suitable for output of Neon immediate logic operation
12010 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12011 int inverse
, int quad
)
12013 int width
, is_valid
;
12014 static char templ
[40];
12016 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12018 gcc_assert (is_valid
!= 0);
12021 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12023 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12028 /* Return a string suitable for output of Neon immediate shift operation
12029 (VSHR or VSHL) MNEM. */
12032 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12033 machine_mode mode
, int quad
,
12036 int width
, is_valid
;
12037 static char templ
[40];
12039 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12040 gcc_assert (is_valid
!= 0);
12043 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12045 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12050 /* Output a sequence of pairwise operations to implement a reduction.
12051 NOTE: We do "too much work" here, because pairwise operations work on two
12052 registers-worth of operands in one go. Unfortunately we can't exploit those
12053 extra calculations to do the full operation in fewer steps, I don't think.
12054 Although all vector elements of the result but the first are ignored, we
12055 actually calculate the same result in each of the elements. An alternative
12056 such as initially loading a vector with zero to use as each of the second
12057 operands would use up an additional register and take an extra instruction,
12058 for no particular gain. */
12061 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12062 rtx (*reduc
) (rtx
, rtx
, rtx
))
12064 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12067 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12069 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12070 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12075 /* If VALS is a vector constant that can be loaded into a register
12076 using VDUP, generate instructions to do so and return an RTX to
12077 assign to the register. Otherwise return NULL_RTX. */
12080 neon_vdup_constant (rtx vals
)
12082 machine_mode mode
= GET_MODE (vals
);
12083 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12086 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12089 if (!const_vec_duplicate_p (vals
, &x
))
12090 /* The elements are not all the same. We could handle repeating
12091 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12092 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12096 /* We can load this constant by using VDUP and a constant in a
12097 single ARM register. This will be cheaper than a vector
12100 x
= copy_to_mode_reg (inner_mode
, x
);
12101 return gen_rtx_VEC_DUPLICATE (mode
, x
);
12104 /* Generate code to load VALS, which is a PARALLEL containing only
12105 constants (for vec_init) or CONST_VECTOR, efficiently into a
12106 register. Returns an RTX to copy into the register, or NULL_RTX
12107 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12110 neon_make_constant (rtx vals
)
12112 machine_mode mode
= GET_MODE (vals
);
12114 rtx const_vec
= NULL_RTX
;
12115 int n_elts
= GET_MODE_NUNITS (mode
);
12119 if (GET_CODE (vals
) == CONST_VECTOR
)
12121 else if (GET_CODE (vals
) == PARALLEL
)
12123 /* A CONST_VECTOR must contain only CONST_INTs and
12124 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12125 Only store valid constants in a CONST_VECTOR. */
12126 for (i
= 0; i
< n_elts
; ++i
)
12128 rtx x
= XVECEXP (vals
, 0, i
);
12129 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12132 if (n_const
== n_elts
)
12133 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12136 gcc_unreachable ();
12138 if (const_vec
!= NULL
12139 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12140 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12142 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12143 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12144 pipeline cycle; creating the constant takes one or two ARM
12145 pipeline cycles. */
12147 else if (const_vec
!= NULL_RTX
)
12148 /* Load from constant pool. On Cortex-A8 this takes two cycles
12149 (for either double or quad vectors). We can not take advantage
12150 of single-cycle VLD1 because we need a PC-relative addressing
12154 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12155 We can not construct an initializer. */
12159 /* Initialize vector TARGET to VALS. */
12162 neon_expand_vector_init (rtx target
, rtx vals
)
12164 machine_mode mode
= GET_MODE (target
);
12165 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12166 int n_elts
= GET_MODE_NUNITS (mode
);
12167 int n_var
= 0, one_var
= -1;
12168 bool all_same
= true;
12172 for (i
= 0; i
< n_elts
; ++i
)
12174 x
= XVECEXP (vals
, 0, i
);
12175 if (!CONSTANT_P (x
))
12176 ++n_var
, one_var
= i
;
12178 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12184 rtx constant
= neon_make_constant (vals
);
12185 if (constant
!= NULL_RTX
)
12187 emit_move_insn (target
, constant
);
12192 /* Splat a single non-constant element if we can. */
12193 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12195 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12196 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
12200 /* One field is non-constant. Load constant then overwrite varying
12201 field. This is more efficient than using the stack. */
12204 rtx copy
= copy_rtx (vals
);
12205 rtx index
= GEN_INT (one_var
);
12207 /* Load constant part of vector, substitute neighboring value for
12208 varying element. */
12209 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12210 neon_expand_vector_init (target
, copy
);
12212 /* Insert variable. */
12213 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12217 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12220 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12223 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12226 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12229 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12232 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12235 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12238 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12241 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12244 gcc_unreachable ();
12249 /* Construct the vector in memory one field at a time
12250 and load the whole vector. */
12251 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12252 for (i
= 0; i
< n_elts
; i
++)
12253 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12254 i
* GET_MODE_SIZE (inner_mode
)),
12255 XVECEXP (vals
, 0, i
));
12256 emit_move_insn (target
, mem
);
12259 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12260 ERR if it doesn't. EXP indicates the source location, which includes the
12261 inlining history for intrinsics. */
12264 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12265 const_tree exp
, const char *desc
)
12267 HOST_WIDE_INT lane
;
12269 gcc_assert (CONST_INT_P (operand
));
12271 lane
= INTVAL (operand
);
12273 if (lane
< low
|| lane
>= high
)
12276 error ("%K%s %wd out of range %wd - %wd",
12277 exp
, desc
, lane
, low
, high
- 1);
12279 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12283 /* Bounds-check lanes. */
12286 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12289 bounds_check (operand
, low
, high
, exp
, "lane");
12292 /* Bounds-check constants. */
12295 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12297 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12301 neon_element_bits (machine_mode mode
)
12303 return GET_MODE_UNIT_BITSIZE (mode
);
12307 /* Predicates for `match_operand' and `match_operator'. */
12309 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12310 WB is true if full writeback address modes are allowed and is false
12311 if limited writeback address modes (POST_INC and PRE_DEC) are
12315 arm_coproc_mem_operand (rtx op
, bool wb
)
12319 /* Reject eliminable registers. */
12320 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12321 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12322 || reg_mentioned_p (arg_pointer_rtx
, op
)
12323 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12324 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12325 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12326 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12329 /* Constants are converted into offsets from labels. */
12333 ind
= XEXP (op
, 0);
12335 if (reload_completed
12336 && (GET_CODE (ind
) == LABEL_REF
12337 || (GET_CODE (ind
) == CONST
12338 && GET_CODE (XEXP (ind
, 0)) == PLUS
12339 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12340 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12343 /* Match: (mem (reg)). */
12345 return arm_address_register_rtx_p (ind
, 0);
12347 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12348 acceptable in any case (subject to verification by
12349 arm_address_register_rtx_p). We need WB to be true to accept
12350 PRE_INC and POST_DEC. */
12351 if (GET_CODE (ind
) == POST_INC
12352 || GET_CODE (ind
) == PRE_DEC
12354 && (GET_CODE (ind
) == PRE_INC
12355 || GET_CODE (ind
) == POST_DEC
)))
12356 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12359 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12360 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12361 && GET_CODE (XEXP (ind
, 1)) == PLUS
12362 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12363 ind
= XEXP (ind
, 1);
12368 if (GET_CODE (ind
) == PLUS
12369 && REG_P (XEXP (ind
, 0))
12370 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12371 && CONST_INT_P (XEXP (ind
, 1))
12372 && INTVAL (XEXP (ind
, 1)) > -1024
12373 && INTVAL (XEXP (ind
, 1)) < 1024
12374 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12380 /* Return TRUE if OP is a memory operand which we can load or store a vector
12381 to/from. TYPE is one of the following values:
12382 0 - Vector load/stor (vldr)
12383 1 - Core registers (ldm)
12384 2 - Element/structure loads (vld1)
12387 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12391 /* Reject eliminable registers. */
12392 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12393 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12394 || reg_mentioned_p (arg_pointer_rtx
, op
)
12395 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12396 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12397 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12398 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12401 /* Constants are converted into offsets from labels. */
12405 ind
= XEXP (op
, 0);
12407 if (reload_completed
12408 && (GET_CODE (ind
) == LABEL_REF
12409 || (GET_CODE (ind
) == CONST
12410 && GET_CODE (XEXP (ind
, 0)) == PLUS
12411 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12412 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12415 /* Match: (mem (reg)). */
12417 return arm_address_register_rtx_p (ind
, 0);
12419 /* Allow post-increment with Neon registers. */
12420 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12421 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12422 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12424 /* Allow post-increment by register for VLDn */
12425 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12426 && GET_CODE (XEXP (ind
, 1)) == PLUS
12427 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12434 && GET_CODE (ind
) == PLUS
12435 && REG_P (XEXP (ind
, 0))
12436 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12437 && CONST_INT_P (XEXP (ind
, 1))
12438 && INTVAL (XEXP (ind
, 1)) > -1024
12439 /* For quad modes, we restrict the constant offset to be slightly less
12440 than what the instruction format permits. We have no such constraint
12441 on double mode offsets. (This must match arm_legitimate_index_p.) */
12442 && (INTVAL (XEXP (ind
, 1))
12443 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12444 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12450 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12453 neon_struct_mem_operand (rtx op
)
12457 /* Reject eliminable registers. */
12458 if (! (reload_in_progress
|| reload_completed
)
12459 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12460 || reg_mentioned_p (arg_pointer_rtx
, op
)
12461 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12462 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12463 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12464 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12467 /* Constants are converted into offsets from labels. */
12471 ind
= XEXP (op
, 0);
12473 if (reload_completed
12474 && (GET_CODE (ind
) == LABEL_REF
12475 || (GET_CODE (ind
) == CONST
12476 && GET_CODE (XEXP (ind
, 0)) == PLUS
12477 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12478 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12481 /* Match: (mem (reg)). */
12483 return arm_address_register_rtx_p (ind
, 0);
12485 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12486 if (GET_CODE (ind
) == POST_INC
12487 || GET_CODE (ind
) == PRE_DEC
)
12488 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12493 /* Return true if X is a register that will be eliminated later on. */
12495 arm_eliminable_register (rtx x
)
12497 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12498 || REGNO (x
) == ARG_POINTER_REGNUM
12499 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12500 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12503 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12504 coprocessor registers. Otherwise return NO_REGS. */
12507 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12509 if (mode
== HFmode
)
12511 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12512 return GENERAL_REGS
;
12513 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12515 return GENERAL_REGS
;
12518 /* The neon move patterns handle all legitimate vector and struct
12521 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12522 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12523 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12524 || VALID_NEON_STRUCT_MODE (mode
)))
12527 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12530 return GENERAL_REGS
;
12533 /* Values which must be returned in the most-significant end of the return
12537 arm_return_in_msb (const_tree valtype
)
12539 return (TARGET_AAPCS_BASED
12540 && BYTES_BIG_ENDIAN
12541 && (AGGREGATE_TYPE_P (valtype
)
12542 || TREE_CODE (valtype
) == COMPLEX_TYPE
12543 || FIXED_POINT_TYPE_P (valtype
)));
12546 /* Return TRUE if X references a SYMBOL_REF. */
12548 symbol_mentioned_p (rtx x
)
12553 if (GET_CODE (x
) == SYMBOL_REF
)
12556 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12557 are constant offsets, not symbols. */
12558 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12561 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12563 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12569 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12570 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12573 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12580 /* Return TRUE if X references a LABEL_REF. */
12582 label_mentioned_p (rtx x
)
12587 if (GET_CODE (x
) == LABEL_REF
)
12590 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12591 instruction, but they are constant offsets, not symbols. */
12592 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12595 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12596 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12602 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12603 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12606 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12614 tls_mentioned_p (rtx x
)
12616 switch (GET_CODE (x
))
12619 return tls_mentioned_p (XEXP (x
, 0));
12622 if (XINT (x
, 1) == UNSPEC_TLS
)
12625 /* Fall through. */
12631 /* Must not copy any rtx that uses a pc-relative address.
12632 Also, disallow copying of load-exclusive instructions that
12633 may appear after splitting of compare-and-swap-style operations
12634 so as to prevent those loops from being transformed away from their
12635 canonical forms (see PR 69904). */
12638 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12640 /* The tls call insn cannot be copied, as it is paired with a data
12642 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12645 subrtx_iterator::array_type array
;
12646 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12648 const_rtx x
= *iter
;
12649 if (GET_CODE (x
) == UNSPEC
12650 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12651 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12655 rtx set
= single_set (insn
);
12658 rtx src
= SET_SRC (set
);
12659 if (GET_CODE (src
) == ZERO_EXTEND
)
12660 src
= XEXP (src
, 0);
12662 /* Catch the load-exclusive and load-acquire operations. */
12663 if (GET_CODE (src
) == UNSPEC_VOLATILE
12664 && (XINT (src
, 1) == VUNSPEC_LL
12665 || XINT (src
, 1) == VUNSPEC_LAX
))
12672 minmax_code (rtx x
)
12674 enum rtx_code code
= GET_CODE (x
);
12687 gcc_unreachable ();
12691 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12694 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12695 int *mask
, bool *signed_sat
)
12697 /* The high bound must be a power of two minus one. */
12698 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12702 /* The low bound is either zero (for usat) or one less than the
12703 negation of the high bound (for ssat). */
12704 if (INTVAL (lo_bound
) == 0)
12709 *signed_sat
= false;
12714 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12719 *signed_sat
= true;
12727 /* Return 1 if memory locations are adjacent. */
12729 adjacent_mem_locations (rtx a
, rtx b
)
12731 /* We don't guarantee to preserve the order of these memory refs. */
12732 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12735 if ((REG_P (XEXP (a
, 0))
12736 || (GET_CODE (XEXP (a
, 0)) == PLUS
12737 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12738 && (REG_P (XEXP (b
, 0))
12739 || (GET_CODE (XEXP (b
, 0)) == PLUS
12740 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12742 HOST_WIDE_INT val0
= 0, val1
= 0;
12746 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12748 reg0
= XEXP (XEXP (a
, 0), 0);
12749 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12752 reg0
= XEXP (a
, 0);
12754 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12756 reg1
= XEXP (XEXP (b
, 0), 0);
12757 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12760 reg1
= XEXP (b
, 0);
12762 /* Don't accept any offset that will require multiple
12763 instructions to handle, since this would cause the
12764 arith_adjacentmem pattern to output an overlong sequence. */
12765 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12768 /* Don't allow an eliminable register: register elimination can make
12769 the offset too large. */
12770 if (arm_eliminable_register (reg0
))
12773 val_diff
= val1
- val0
;
12777 /* If the target has load delay slots, then there's no benefit
12778 to using an ldm instruction unless the offset is zero and
12779 we are optimizing for size. */
12780 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12781 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12782 && (val_diff
== 4 || val_diff
== -4));
12785 return ((REGNO (reg0
) == REGNO (reg1
))
12786 && (val_diff
== 4 || val_diff
== -4));
12792 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12793 for load operations, false for store operations. CONSECUTIVE is true
12794 if the register numbers in the operation must be consecutive in the register
12795 bank. RETURN_PC is true if value is to be loaded in PC.
12796 The pattern we are trying to match for load is:
12797 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12798 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12801 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12804 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12805 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12806 3. If consecutive is TRUE, then for kth register being loaded,
12807 REGNO (R_dk) = REGNO (R_d0) + k.
12808 The pattern for store is similar. */
12810 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12811 bool consecutive
, bool return_pc
)
12813 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12814 rtx reg
, mem
, addr
;
12816 unsigned first_regno
;
12817 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12819 bool addr_reg_in_reglist
= false;
12820 bool update
= false;
12825 /* If not in SImode, then registers must be consecutive
12826 (e.g., VLDM instructions for DFmode). */
12827 gcc_assert ((mode
== SImode
) || consecutive
);
12828 /* Setting return_pc for stores is illegal. */
12829 gcc_assert (!return_pc
|| load
);
12831 /* Set up the increments and the regs per val based on the mode. */
12832 reg_increment
= GET_MODE_SIZE (mode
);
12833 regs_per_val
= reg_increment
/ 4;
12834 offset_adj
= return_pc
? 1 : 0;
12837 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12838 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12841 /* Check if this is a write-back. */
12842 elt
= XVECEXP (op
, 0, offset_adj
);
12843 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12849 /* The offset adjustment must be the number of registers being
12850 popped times the size of a single register. */
12851 if (!REG_P (SET_DEST (elt
))
12852 || !REG_P (XEXP (SET_SRC (elt
), 0))
12853 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12854 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12855 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12856 ((count
- 1 - offset_adj
) * reg_increment
))
12860 i
= i
+ offset_adj
;
12861 base
= base
+ offset_adj
;
12862 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12863 success depends on the type: VLDM can do just one reg,
12864 LDM must do at least two. */
12865 if ((count
<= i
) && (mode
== SImode
))
12868 elt
= XVECEXP (op
, 0, i
- 1);
12869 if (GET_CODE (elt
) != SET
)
12874 reg
= SET_DEST (elt
);
12875 mem
= SET_SRC (elt
);
12879 reg
= SET_SRC (elt
);
12880 mem
= SET_DEST (elt
);
12883 if (!REG_P (reg
) || !MEM_P (mem
))
12886 regno
= REGNO (reg
);
12887 first_regno
= regno
;
12888 addr
= XEXP (mem
, 0);
12889 if (GET_CODE (addr
) == PLUS
)
12891 if (!CONST_INT_P (XEXP (addr
, 1)))
12894 offset
= INTVAL (XEXP (addr
, 1));
12895 addr
= XEXP (addr
, 0);
12901 /* Don't allow SP to be loaded unless it is also the base register. It
12902 guarantees that SP is reset correctly when an LDM instruction
12903 is interrupted. Otherwise, we might end up with a corrupt stack. */
12904 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12907 for (; i
< count
; i
++)
12909 elt
= XVECEXP (op
, 0, i
);
12910 if (GET_CODE (elt
) != SET
)
12915 reg
= SET_DEST (elt
);
12916 mem
= SET_SRC (elt
);
12920 reg
= SET_SRC (elt
);
12921 mem
= SET_DEST (elt
);
12925 || GET_MODE (reg
) != mode
12926 || REGNO (reg
) <= regno
12929 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12930 /* Don't allow SP to be loaded unless it is also the base register. It
12931 guarantees that SP is reset correctly when an LDM instruction
12932 is interrupted. Otherwise, we might end up with a corrupt stack. */
12933 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12935 || GET_MODE (mem
) != mode
12936 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12937 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12938 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
12939 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
12940 offset
+ (i
- base
) * reg_increment
))
12941 && (!REG_P (XEXP (mem
, 0))
12942 || offset
+ (i
- base
) * reg_increment
!= 0)))
12945 regno
= REGNO (reg
);
12946 if (regno
== REGNO (addr
))
12947 addr_reg_in_reglist
= true;
12952 if (update
&& addr_reg_in_reglist
)
12955 /* For Thumb-1, address register is always modified - either by write-back
12956 or by explicit load. If the pattern does not describe an update,
12957 then the address register must be in the list of loaded registers. */
12959 return update
|| addr_reg_in_reglist
;
12965 /* Return true iff it would be profitable to turn a sequence of NOPS loads
12966 or stores (depending on IS_STORE) into a load-multiple or store-multiple
12967 instruction. ADD_OFFSET is nonzero if the base address register needs
12968 to be modified with an add instruction before we can use it. */
12971 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
12972 int nops
, HOST_WIDE_INT add_offset
)
12974 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
12975 if the offset isn't small enough. The reason 2 ldrs are faster
12976 is because these ARMs are able to do more than one cache access
12977 in a single cycle. The ARM9 and StrongARM have Harvard caches,
12978 whilst the ARM8 has a double bandwidth cache. This means that
12979 these cores can do both an instruction fetch and a data fetch in
12980 a single cycle, so the trick of calculating the address into a
12981 scratch register (one of the result regs) and then doing a load
12982 multiple actually becomes slower (and no smaller in code size).
12983 That is the transformation
12985 ldr rd1, [rbase + offset]
12986 ldr rd2, [rbase + offset + 4]
12990 add rd1, rbase, offset
12991 ldmia rd1, {rd1, rd2}
12993 produces worse code -- '3 cycles + any stalls on rd2' instead of
12994 '2 cycles + any stalls on rd2'. On ARMs with only one cache
12995 access per cycle, the first sequence could never complete in less
12996 than 6 cycles, whereas the ldm sequence would only take 5 and
12997 would make better use of sequential accesses if not hitting the
13000 We cheat here and test 'arm_ld_sched' which we currently know to
13001 only be true for the ARM8, ARM9 and StrongARM. If this ever
13002 changes, then the test below needs to be reworked. */
13003 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13006 /* XScale has load-store double instructions, but they have stricter
13007 alignment requirements than load-store multiple, so we cannot
13010 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13011 the pipeline until completion.
13019 An ldr instruction takes 1-3 cycles, but does not block the
13028 Best case ldr will always win. However, the more ldr instructions
13029 we issue, the less likely we are to be able to schedule them well.
13030 Using ldr instructions also increases code size.
13032 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13033 for counts of 3 or 4 regs. */
13034 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13039 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13040 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13041 an array ORDER which describes the sequence to use when accessing the
13042 offsets that produces an ascending order. In this sequence, each
13043 offset must be larger by exactly 4 than the previous one. ORDER[0]
13044 must have been filled in with the lowest offset by the caller.
13045 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13046 we use to verify that ORDER produces an ascending order of registers.
13047 Return true if it was possible to construct such an order, false if
13051 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13052 int *unsorted_regs
)
13055 for (i
= 1; i
< nops
; i
++)
13059 order
[i
] = order
[i
- 1];
13060 for (j
= 0; j
< nops
; j
++)
13061 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13063 /* We must find exactly one offset that is higher than the
13064 previous one by 4. */
13065 if (order
[i
] != order
[i
- 1])
13069 if (order
[i
] == order
[i
- 1])
13071 /* The register numbers must be ascending. */
13072 if (unsorted_regs
!= NULL
13073 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13079 /* Used to determine in a peephole whether a sequence of load
13080 instructions can be changed into a load-multiple instruction.
13081 NOPS is the number of separate load instructions we are examining. The
13082 first NOPS entries in OPERANDS are the destination registers, the
13083 next NOPS entries are memory operands. If this function is
13084 successful, *BASE is set to the common base register of the memory
13085 accesses; *LOAD_OFFSET is set to the first memory location's offset
13086 from that base register.
13087 REGS is an array filled in with the destination register numbers.
13088 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13089 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13090 the sequence of registers in REGS matches the loads from ascending memory
13091 locations, and the function verifies that the register numbers are
13092 themselves ascending. If CHECK_REGS is false, the register numbers
13093 are stored in the order they are found in the operands. */
13095 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13096 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13098 int unsorted_regs
[MAX_LDM_STM_OPS
];
13099 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13100 int order
[MAX_LDM_STM_OPS
];
13101 rtx base_reg_rtx
= NULL
;
13105 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13106 easily extended if required. */
13107 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13109 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13111 /* Loop over the operands and check that the memory references are
13112 suitable (i.e. immediate offsets from the same base register). At
13113 the same time, extract the target register, and the memory
13115 for (i
= 0; i
< nops
; i
++)
13120 /* Convert a subreg of a mem into the mem itself. */
13121 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13122 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13124 gcc_assert (MEM_P (operands
[nops
+ i
]));
13126 /* Don't reorder volatile memory references; it doesn't seem worth
13127 looking for the case where the order is ok anyway. */
13128 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13131 offset
= const0_rtx
;
13133 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13134 || (GET_CODE (reg
) == SUBREG
13135 && REG_P (reg
= SUBREG_REG (reg
))))
13136 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13137 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13138 || (GET_CODE (reg
) == SUBREG
13139 && REG_P (reg
= SUBREG_REG (reg
))))
13140 && (CONST_INT_P (offset
13141 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13145 base_reg
= REGNO (reg
);
13146 base_reg_rtx
= reg
;
13147 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13150 else if (base_reg
!= (int) REGNO (reg
))
13151 /* Not addressed from the same base register. */
13154 unsorted_regs
[i
] = (REG_P (operands
[i
])
13155 ? REGNO (operands
[i
])
13156 : REGNO (SUBREG_REG (operands
[i
])));
13158 /* If it isn't an integer register, or if it overwrites the
13159 base register but isn't the last insn in the list, then
13160 we can't do this. */
13161 if (unsorted_regs
[i
] < 0
13162 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13163 || unsorted_regs
[i
] > 14
13164 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13167 /* Don't allow SP to be loaded unless it is also the base
13168 register. It guarantees that SP is reset correctly when
13169 an LDM instruction is interrupted. Otherwise, we might
13170 end up with a corrupt stack. */
13171 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13174 unsorted_offsets
[i
] = INTVAL (offset
);
13175 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13179 /* Not a suitable memory address. */
13183 /* All the useful information has now been extracted from the
13184 operands into unsorted_regs and unsorted_offsets; additionally,
13185 order[0] has been set to the lowest offset in the list. Sort
13186 the offsets into order, verifying that they are adjacent, and
13187 check that the register numbers are ascending. */
13188 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13189 check_regs
? unsorted_regs
: NULL
))
13193 memcpy (saved_order
, order
, sizeof order
);
13199 for (i
= 0; i
< nops
; i
++)
13200 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13202 *load_offset
= unsorted_offsets
[order
[0]];
13206 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13209 if (unsorted_offsets
[order
[0]] == 0)
13210 ldm_case
= 1; /* ldmia */
13211 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13212 ldm_case
= 2; /* ldmib */
13213 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13214 ldm_case
= 3; /* ldmda */
13215 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13216 ldm_case
= 4; /* ldmdb */
13217 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13218 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13223 if (!multiple_operation_profitable_p (false, nops
,
13225 ? unsorted_offsets
[order
[0]] : 0))
13231 /* Used to determine in a peephole whether a sequence of store instructions can
13232 be changed into a store-multiple instruction.
13233 NOPS is the number of separate store instructions we are examining.
13234 NOPS_TOTAL is the total number of instructions recognized by the peephole
13236 The first NOPS entries in OPERANDS are the source registers, the next
13237 NOPS entries are memory operands. If this function is successful, *BASE is
13238 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13239 to the first memory location's offset from that base register. REGS is an
13240 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13241 likewise filled with the corresponding rtx's.
13242 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13243 numbers to an ascending order of stores.
13244 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13245 from ascending memory locations, and the function verifies that the register
13246 numbers are themselves ascending. If CHECK_REGS is false, the register
13247 numbers are stored in the order they are found in the operands. */
13249 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13250 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13251 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13253 int unsorted_regs
[MAX_LDM_STM_OPS
];
13254 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13255 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13256 int order
[MAX_LDM_STM_OPS
];
13258 rtx base_reg_rtx
= NULL
;
13261 /* Write back of base register is currently only supported for Thumb 1. */
13262 int base_writeback
= TARGET_THUMB1
;
13264 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13265 easily extended if required. */
13266 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13268 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13270 /* Loop over the operands and check that the memory references are
13271 suitable (i.e. immediate offsets from the same base register). At
13272 the same time, extract the target register, and the memory
13274 for (i
= 0; i
< nops
; i
++)
13279 /* Convert a subreg of a mem into the mem itself. */
13280 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13281 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13283 gcc_assert (MEM_P (operands
[nops
+ i
]));
13285 /* Don't reorder volatile memory references; it doesn't seem worth
13286 looking for the case where the order is ok anyway. */
13287 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13290 offset
= const0_rtx
;
13292 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13293 || (GET_CODE (reg
) == SUBREG
13294 && REG_P (reg
= SUBREG_REG (reg
))))
13295 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13296 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13297 || (GET_CODE (reg
) == SUBREG
13298 && REG_P (reg
= SUBREG_REG (reg
))))
13299 && (CONST_INT_P (offset
13300 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13302 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13303 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13304 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13308 base_reg
= REGNO (reg
);
13309 base_reg_rtx
= reg
;
13310 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13313 else if (base_reg
!= (int) REGNO (reg
))
13314 /* Not addressed from the same base register. */
13317 /* If it isn't an integer register, then we can't do this. */
13318 if (unsorted_regs
[i
] < 0
13319 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13320 /* The effects are unpredictable if the base register is
13321 both updated and stored. */
13322 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13323 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13324 || unsorted_regs
[i
] > 14)
13327 unsorted_offsets
[i
] = INTVAL (offset
);
13328 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13332 /* Not a suitable memory address. */
13336 /* All the useful information has now been extracted from the
13337 operands into unsorted_regs and unsorted_offsets; additionally,
13338 order[0] has been set to the lowest offset in the list. Sort
13339 the offsets into order, verifying that they are adjacent, and
13340 check that the register numbers are ascending. */
13341 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13342 check_regs
? unsorted_regs
: NULL
))
13346 memcpy (saved_order
, order
, sizeof order
);
13352 for (i
= 0; i
< nops
; i
++)
13354 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13356 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13359 *load_offset
= unsorted_offsets
[order
[0]];
13363 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13366 if (unsorted_offsets
[order
[0]] == 0)
13367 stm_case
= 1; /* stmia */
13368 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13369 stm_case
= 2; /* stmib */
13370 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13371 stm_case
= 3; /* stmda */
13372 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13373 stm_case
= 4; /* stmdb */
13377 if (!multiple_operation_profitable_p (false, nops
, 0))
13383 /* Routines for use in generating RTL. */
13385 /* Generate a load-multiple instruction. COUNT is the number of loads in
13386 the instruction; REGS and MEMS are arrays containing the operands.
13387 BASEREG is the base register to be used in addressing the memory operands.
13388 WBACK_OFFSET is nonzero if the instruction should update the base
13392 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13393 HOST_WIDE_INT wback_offset
)
13398 if (!multiple_operation_profitable_p (false, count
, 0))
13404 for (i
= 0; i
< count
; i
++)
13405 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13407 if (wback_offset
!= 0)
13408 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13410 seq
= get_insns ();
13416 result
= gen_rtx_PARALLEL (VOIDmode
,
13417 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13418 if (wback_offset
!= 0)
13420 XVECEXP (result
, 0, 0)
13421 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13426 for (j
= 0; i
< count
; i
++, j
++)
13427 XVECEXP (result
, 0, i
)
13428 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13433 /* Generate a store-multiple instruction. COUNT is the number of stores in
13434 the instruction; REGS and MEMS are arrays containing the operands.
13435 BASEREG is the base register to be used in addressing the memory operands.
13436 WBACK_OFFSET is nonzero if the instruction should update the base
13440 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13441 HOST_WIDE_INT wback_offset
)
13446 if (GET_CODE (basereg
) == PLUS
)
13447 basereg
= XEXP (basereg
, 0);
13449 if (!multiple_operation_profitable_p (false, count
, 0))
13455 for (i
= 0; i
< count
; i
++)
13456 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13458 if (wback_offset
!= 0)
13459 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13461 seq
= get_insns ();
13467 result
= gen_rtx_PARALLEL (VOIDmode
,
13468 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13469 if (wback_offset
!= 0)
13471 XVECEXP (result
, 0, 0)
13472 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13477 for (j
= 0; i
< count
; i
++, j
++)
13478 XVECEXP (result
, 0, i
)
13479 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13484 /* Generate either a load-multiple or a store-multiple instruction. This
13485 function can be used in situations where we can start with a single MEM
13486 rtx and adjust its address upwards.
13487 COUNT is the number of operations in the instruction, not counting a
13488 possible update of the base register. REGS is an array containing the
13490 BASEREG is the base register to be used in addressing the memory operands,
13491 which are constructed from BASEMEM.
13492 WRITE_BACK specifies whether the generated instruction should include an
13493 update of the base register.
13494 OFFSETP is used to pass an offset to and from this function; this offset
13495 is not used when constructing the address (instead BASEMEM should have an
13496 appropriate offset in its address), it is used only for setting
13497 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13500 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13501 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13503 rtx mems
[MAX_LDM_STM_OPS
];
13504 HOST_WIDE_INT offset
= *offsetp
;
13507 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13509 if (GET_CODE (basereg
) == PLUS
)
13510 basereg
= XEXP (basereg
, 0);
13512 for (i
= 0; i
< count
; i
++)
13514 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13515 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13523 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13524 write_back
? 4 * count
: 0);
13526 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13527 write_back
? 4 * count
: 0);
13531 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13532 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13534 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13539 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13540 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13542 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13546 /* Called from a peephole2 expander to turn a sequence of loads into an
13547 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13548 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13549 is true if we can reorder the registers because they are used commutatively
13551 Returns true iff we could generate a new instruction. */
13554 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13556 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13557 rtx mems
[MAX_LDM_STM_OPS
];
13558 int i
, j
, base_reg
;
13560 HOST_WIDE_INT offset
;
13561 int write_back
= FALSE
;
13565 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13566 &base_reg
, &offset
, !sort_regs
);
13572 for (i
= 0; i
< nops
- 1; i
++)
13573 for (j
= i
+ 1; j
< nops
; j
++)
13574 if (regs
[i
] > regs
[j
])
13580 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13584 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13585 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13591 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13592 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13594 if (!TARGET_THUMB1
)
13596 base_reg
= regs
[0];
13597 base_reg_rtx
= newbase
;
13601 for (i
= 0; i
< nops
; i
++)
13603 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13604 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13607 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13608 write_back
? offset
+ i
* 4 : 0));
13612 /* Called from a peephole2 expander to turn a sequence of stores into an
13613 STM instruction. OPERANDS are the operands found by the peephole matcher;
13614 NOPS indicates how many separate stores we are trying to combine.
13615 Returns true iff we could generate a new instruction. */
13618 gen_stm_seq (rtx
*operands
, int nops
)
13621 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13622 rtx mems
[MAX_LDM_STM_OPS
];
13625 HOST_WIDE_INT offset
;
13626 int write_back
= FALSE
;
13629 bool base_reg_dies
;
13631 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13632 mem_order
, &base_reg
, &offset
, true);
13637 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13639 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13642 gcc_assert (base_reg_dies
);
13648 gcc_assert (base_reg_dies
);
13649 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13653 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13655 for (i
= 0; i
< nops
; i
++)
13657 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13658 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13661 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13662 write_back
? offset
+ i
* 4 : 0));
13666 /* Called from a peephole2 expander to turn a sequence of stores that are
13667 preceded by constant loads into an STM instruction. OPERANDS are the
13668 operands found by the peephole matcher; NOPS indicates how many
13669 separate stores we are trying to combine; there are 2 * NOPS
13670 instructions in the peephole.
13671 Returns true iff we could generate a new instruction. */
13674 gen_const_stm_seq (rtx
*operands
, int nops
)
13676 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13677 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13678 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13679 rtx mems
[MAX_LDM_STM_OPS
];
13682 HOST_WIDE_INT offset
;
13683 int write_back
= FALSE
;
13686 bool base_reg_dies
;
13688 HARD_REG_SET allocated
;
13690 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13691 mem_order
, &base_reg
, &offset
, false);
13696 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13698 /* If the same register is used more than once, try to find a free
13700 CLEAR_HARD_REG_SET (allocated
);
13701 for (i
= 0; i
< nops
; i
++)
13703 for (j
= i
+ 1; j
< nops
; j
++)
13704 if (regs
[i
] == regs
[j
])
13706 rtx t
= peep2_find_free_register (0, nops
* 2,
13707 TARGET_THUMB1
? "l" : "r",
13708 SImode
, &allocated
);
13712 regs
[i
] = REGNO (t
);
13716 /* Compute an ordering that maps the register numbers to an ascending
13719 for (i
= 0; i
< nops
; i
++)
13720 if (regs
[i
] < regs
[reg_order
[0]])
13723 for (i
= 1; i
< nops
; i
++)
13725 int this_order
= reg_order
[i
- 1];
13726 for (j
= 0; j
< nops
; j
++)
13727 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13728 && (this_order
== reg_order
[i
- 1]
13729 || regs
[j
] < regs
[this_order
]))
13731 reg_order
[i
] = this_order
;
13734 /* Ensure that registers that must be live after the instruction end
13735 up with the correct value. */
13736 for (i
= 0; i
< nops
; i
++)
13738 int this_order
= reg_order
[i
];
13739 if ((this_order
!= mem_order
[i
]
13740 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13741 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13745 /* Load the constants. */
13746 for (i
= 0; i
< nops
; i
++)
13748 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13749 sorted_regs
[i
] = regs
[reg_order
[i
]];
13750 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13753 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13755 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13758 gcc_assert (base_reg_dies
);
13764 gcc_assert (base_reg_dies
);
13765 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13769 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13771 for (i
= 0; i
< nops
; i
++)
13773 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13774 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13777 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13778 write_back
? offset
+ i
* 4 : 0));
13782 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13783 unaligned copies on processors which support unaligned semantics for those
13784 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13785 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13786 An interleave factor of 1 (the minimum) will perform no interleaving.
13787 Load/store multiple are used for aligned addresses where possible. */
13790 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13791 HOST_WIDE_INT length
,
13792 unsigned int interleave_factor
)
13794 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13795 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13796 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13797 HOST_WIDE_INT i
, j
;
13798 HOST_WIDE_INT remaining
= length
, words
;
13799 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13801 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13802 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13803 HOST_WIDE_INT srcoffset
, dstoffset
;
13804 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13807 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13809 /* Use hard registers if we have aligned source or destination so we can use
13810 load/store multiple with contiguous registers. */
13811 if (dst_aligned
|| src_aligned
)
13812 for (i
= 0; i
< interleave_factor
; i
++)
13813 regs
[i
] = gen_rtx_REG (SImode
, i
);
13815 for (i
= 0; i
< interleave_factor
; i
++)
13816 regs
[i
] = gen_reg_rtx (SImode
);
13818 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13819 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13821 srcoffset
= dstoffset
= 0;
13823 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13824 For copying the last bytes we want to subtract this offset again. */
13825 src_autoinc
= dst_autoinc
= 0;
13827 for (i
= 0; i
< interleave_factor
; i
++)
13830 /* Copy BLOCK_SIZE_BYTES chunks. */
13832 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13835 if (src_aligned
&& interleave_factor
> 1)
13837 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13838 TRUE
, srcbase
, &srcoffset
));
13839 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13843 for (j
= 0; j
< interleave_factor
; j
++)
13845 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13847 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13848 srcoffset
+ j
* UNITS_PER_WORD
);
13849 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13851 srcoffset
+= block_size_bytes
;
13855 if (dst_aligned
&& interleave_factor
> 1)
13857 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13858 TRUE
, dstbase
, &dstoffset
));
13859 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13863 for (j
= 0; j
< interleave_factor
; j
++)
13865 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13867 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13868 dstoffset
+ j
* UNITS_PER_WORD
);
13869 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13871 dstoffset
+= block_size_bytes
;
13874 remaining
-= block_size_bytes
;
13877 /* Copy any whole words left (note these aren't interleaved with any
13878 subsequent halfword/byte load/stores in the interests of simplicity). */
13880 words
= remaining
/ UNITS_PER_WORD
;
13882 gcc_assert (words
< interleave_factor
);
13884 if (src_aligned
&& words
> 1)
13886 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13888 src_autoinc
+= UNITS_PER_WORD
* words
;
13892 for (j
= 0; j
< words
; j
++)
13894 addr
= plus_constant (Pmode
, src
,
13895 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13896 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13897 srcoffset
+ j
* UNITS_PER_WORD
);
13899 emit_move_insn (regs
[j
], mem
);
13901 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13903 srcoffset
+= words
* UNITS_PER_WORD
;
13906 if (dst_aligned
&& words
> 1)
13908 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13910 dst_autoinc
+= words
* UNITS_PER_WORD
;
13914 for (j
= 0; j
< words
; j
++)
13916 addr
= plus_constant (Pmode
, dst
,
13917 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13918 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13919 dstoffset
+ j
* UNITS_PER_WORD
);
13921 emit_move_insn (mem
, regs
[j
]);
13923 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13925 dstoffset
+= words
* UNITS_PER_WORD
;
13928 remaining
-= words
* UNITS_PER_WORD
;
13930 gcc_assert (remaining
< 4);
13932 /* Copy a halfword if necessary. */
13934 if (remaining
>= 2)
13936 halfword_tmp
= gen_reg_rtx (SImode
);
13938 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13939 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13940 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
13942 /* Either write out immediately, or delay until we've loaded the last
13943 byte, depending on interleave factor. */
13944 if (interleave_factor
== 1)
13946 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13947 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13948 emit_insn (gen_unaligned_storehi (mem
,
13949 gen_lowpart (HImode
, halfword_tmp
)));
13950 halfword_tmp
= NULL
;
13958 gcc_assert (remaining
< 2);
13960 /* Copy last byte. */
13962 if ((remaining
& 1) != 0)
13964 byte_tmp
= gen_reg_rtx (SImode
);
13966 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13967 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
13968 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
13970 if (interleave_factor
== 1)
13972 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13973 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
13974 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
13983 /* Store last halfword if we haven't done so already. */
13987 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13988 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
13989 emit_insn (gen_unaligned_storehi (mem
,
13990 gen_lowpart (HImode
, halfword_tmp
)));
13994 /* Likewise for last byte. */
13998 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
13999 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14000 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14004 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14007 /* From mips_adjust_block_mem:
14009 Helper function for doing a loop-based block operation on memory
14010 reference MEM. Each iteration of the loop will operate on LENGTH
14013 Create a new base register for use within the loop and point it to
14014 the start of MEM. Create a new memory reference that uses this
14015 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14018 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14021 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14023 /* Although the new mem does not refer to a known location,
14024 it does keep up to LENGTH bytes of alignment. */
14025 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14026 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14029 /* From mips_block_move_loop:
14031 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14032 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14033 the memory regions do not overlap. */
14036 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14037 unsigned int interleave_factor
,
14038 HOST_WIDE_INT bytes_per_iter
)
14040 rtx src_reg
, dest_reg
, final_src
, test
;
14041 HOST_WIDE_INT leftover
;
14043 leftover
= length
% bytes_per_iter
;
14044 length
-= leftover
;
14046 /* Create registers and memory references for use within the loop. */
14047 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14048 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14050 /* Calculate the value that SRC_REG should have after the last iteration of
14052 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14053 0, 0, OPTAB_WIDEN
);
14055 /* Emit the start of the loop. */
14056 rtx_code_label
*label
= gen_label_rtx ();
14057 emit_label (label
);
14059 /* Emit the loop body. */
14060 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14061 interleave_factor
);
14063 /* Move on to the next block. */
14064 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14065 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14067 /* Emit the loop condition. */
14068 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14069 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14071 /* Mop up any left-over bytes. */
14073 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14076 /* Emit a block move when either the source or destination is unaligned (not
14077 aligned to a four-byte boundary). This may need further tuning depending on
14078 core type, optimize_size setting, etc. */
14081 arm_movmemqi_unaligned (rtx
*operands
)
14083 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14087 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14088 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14089 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14090 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14091 or dst_aligned though: allow more interleaving in those cases since the
14092 resulting code can be smaller. */
14093 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14094 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14097 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14098 interleave_factor
, bytes_per_iter
);
14100 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14101 interleave_factor
);
14105 /* Note that the loop created by arm_block_move_unaligned_loop may be
14106 subject to loop unrolling, which makes tuning this condition a little
14109 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14111 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14118 arm_gen_movmemqi (rtx
*operands
)
14120 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14121 HOST_WIDE_INT srcoffset
, dstoffset
;
14123 rtx src
, dst
, srcbase
, dstbase
;
14124 rtx part_bytes_reg
= NULL
;
14127 if (!CONST_INT_P (operands
[2])
14128 || !CONST_INT_P (operands
[3])
14129 || INTVAL (operands
[2]) > 64)
14132 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14133 return arm_movmemqi_unaligned (operands
);
14135 if (INTVAL (operands
[3]) & 3)
14138 dstbase
= operands
[0];
14139 srcbase
= operands
[1];
14141 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14142 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14144 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14145 out_words_to_go
= INTVAL (operands
[2]) / 4;
14146 last_bytes
= INTVAL (operands
[2]) & 3;
14147 dstoffset
= srcoffset
= 0;
14149 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14150 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14152 for (i
= 0; in_words_to_go
>= 2; i
+=4)
14154 if (in_words_to_go
> 4)
14155 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14156 TRUE
, srcbase
, &srcoffset
));
14158 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14159 src
, FALSE
, srcbase
,
14162 if (out_words_to_go
)
14164 if (out_words_to_go
> 4)
14165 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14166 TRUE
, dstbase
, &dstoffset
));
14167 else if (out_words_to_go
!= 1)
14168 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14169 out_words_to_go
, dst
,
14172 dstbase
, &dstoffset
));
14175 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14176 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14177 if (last_bytes
!= 0)
14179 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14185 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14186 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14189 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14190 if (out_words_to_go
)
14194 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14195 sreg
= copy_to_reg (mem
);
14197 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14198 emit_move_insn (mem
, sreg
);
14201 gcc_assert (!in_words_to_go
); /* Sanity check */
14204 if (in_words_to_go
)
14206 gcc_assert (in_words_to_go
> 0);
14208 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14209 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14212 gcc_assert (!last_bytes
|| part_bytes_reg
);
14214 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14216 rtx tmp
= gen_reg_rtx (SImode
);
14218 /* The bytes we want are in the top end of the word. */
14219 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14220 GEN_INT (8 * (4 - last_bytes
))));
14221 part_bytes_reg
= tmp
;
14225 mem
= adjust_automodify_address (dstbase
, QImode
,
14226 plus_constant (Pmode
, dst
,
14228 dstoffset
+ last_bytes
- 1);
14229 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14233 tmp
= gen_reg_rtx (SImode
);
14234 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14235 part_bytes_reg
= tmp
;
14242 if (last_bytes
> 1)
14244 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14245 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14249 rtx tmp
= gen_reg_rtx (SImode
);
14250 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14251 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14252 part_bytes_reg
= tmp
;
14259 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14260 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14267 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14270 next_consecutive_mem (rtx mem
)
14272 machine_mode mode
= GET_MODE (mem
);
14273 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14274 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14276 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14279 /* Copy using LDRD/STRD instructions whenever possible.
14280 Returns true upon success. */
14282 gen_movmem_ldrd_strd (rtx
*operands
)
14284 unsigned HOST_WIDE_INT len
;
14285 HOST_WIDE_INT align
;
14286 rtx src
, dst
, base
;
14288 bool src_aligned
, dst_aligned
;
14289 bool src_volatile
, dst_volatile
;
14291 gcc_assert (CONST_INT_P (operands
[2]));
14292 gcc_assert (CONST_INT_P (operands
[3]));
14294 len
= UINTVAL (operands
[2]);
14298 /* Maximum alignment we can assume for both src and dst buffers. */
14299 align
= INTVAL (operands
[3]);
14301 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14304 /* Place src and dst addresses in registers
14305 and update the corresponding mem rtx. */
14307 dst_volatile
= MEM_VOLATILE_P (dst
);
14308 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14309 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14310 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14313 src_volatile
= MEM_VOLATILE_P (src
);
14314 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14315 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14316 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14318 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14321 if (src_volatile
|| dst_volatile
)
14324 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14325 if (!(dst_aligned
|| src_aligned
))
14326 return arm_gen_movmemqi (operands
);
14328 /* If the either src or dst is unaligned we'll be accessing it as pairs
14329 of unaligned SImode accesses. Otherwise we can generate DImode
14330 ldrd/strd instructions. */
14331 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14332 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14337 reg0
= gen_reg_rtx (DImode
);
14338 rtx low_reg
= NULL_RTX
;
14339 rtx hi_reg
= NULL_RTX
;
14341 if (!src_aligned
|| !dst_aligned
)
14343 low_reg
= gen_lowpart (SImode
, reg0
);
14344 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14347 emit_move_insn (reg0
, src
);
14350 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14351 src
= next_consecutive_mem (src
);
14352 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14356 emit_move_insn (dst
, reg0
);
14359 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14360 dst
= next_consecutive_mem (dst
);
14361 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14364 src
= next_consecutive_mem (src
);
14365 dst
= next_consecutive_mem (dst
);
14368 gcc_assert (len
< 8);
14371 /* More than a word but less than a double-word to copy. Copy a word. */
14372 reg0
= gen_reg_rtx (SImode
);
14373 src
= adjust_address (src
, SImode
, 0);
14374 dst
= adjust_address (dst
, SImode
, 0);
14376 emit_move_insn (reg0
, src
);
14378 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14381 emit_move_insn (dst
, reg0
);
14383 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14385 src
= next_consecutive_mem (src
);
14386 dst
= next_consecutive_mem (dst
);
14393 /* Copy the remaining bytes. */
14396 dst
= adjust_address (dst
, HImode
, 0);
14397 src
= adjust_address (src
, HImode
, 0);
14398 reg0
= gen_reg_rtx (SImode
);
14400 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14402 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14405 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14407 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14409 src
= next_consecutive_mem (src
);
14410 dst
= next_consecutive_mem (dst
);
14415 dst
= adjust_address (dst
, QImode
, 0);
14416 src
= adjust_address (src
, QImode
, 0);
14417 reg0
= gen_reg_rtx (QImode
);
14418 emit_move_insn (reg0
, src
);
14419 emit_move_insn (dst
, reg0
);
14423 /* Select a dominance comparison mode if possible for a test of the general
14424 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14425 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14426 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14427 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14428 In all cases OP will be either EQ or NE, but we don't need to know which
14429 here. If we are unable to support a dominance comparison we return
14430 CC mode. This will then fail to match for the RTL expressions that
14431 generate this call. */
14433 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14435 enum rtx_code cond1
, cond2
;
14438 /* Currently we will probably get the wrong result if the individual
14439 comparisons are not simple. This also ensures that it is safe to
14440 reverse a comparison if necessary. */
14441 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14443 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14447 /* The if_then_else variant of this tests the second condition if the
14448 first passes, but is true if the first fails. Reverse the first
14449 condition to get a true "inclusive-or" expression. */
14450 if (cond_or
== DOM_CC_NX_OR_Y
)
14451 cond1
= reverse_condition (cond1
);
14453 /* If the comparisons are not equal, and one doesn't dominate the other,
14454 then we can't do this. */
14456 && !comparison_dominates_p (cond1
, cond2
)
14457 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14461 std::swap (cond1
, cond2
);
14466 if (cond_or
== DOM_CC_X_AND_Y
)
14471 case EQ
: return CC_DEQmode
;
14472 case LE
: return CC_DLEmode
;
14473 case LEU
: return CC_DLEUmode
;
14474 case GE
: return CC_DGEmode
;
14475 case GEU
: return CC_DGEUmode
;
14476 default: gcc_unreachable ();
14480 if (cond_or
== DOM_CC_X_AND_Y
)
14492 gcc_unreachable ();
14496 if (cond_or
== DOM_CC_X_AND_Y
)
14508 gcc_unreachable ();
14512 if (cond_or
== DOM_CC_X_AND_Y
)
14513 return CC_DLTUmode
;
14518 return CC_DLTUmode
;
14520 return CC_DLEUmode
;
14524 gcc_unreachable ();
14528 if (cond_or
== DOM_CC_X_AND_Y
)
14529 return CC_DGTUmode
;
14534 return CC_DGTUmode
;
14536 return CC_DGEUmode
;
14540 gcc_unreachable ();
14543 /* The remaining cases only occur when both comparisons are the
14546 gcc_assert (cond1
== cond2
);
14550 gcc_assert (cond1
== cond2
);
14554 gcc_assert (cond1
== cond2
);
14558 gcc_assert (cond1
== cond2
);
14559 return CC_DLEUmode
;
14562 gcc_assert (cond1
== cond2
);
14563 return CC_DGEUmode
;
14566 gcc_unreachable ();
14571 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14573 /* All floating point compares return CCFP if it is an equality
14574 comparison, and CCFPE otherwise. */
14575 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14598 gcc_unreachable ();
14602 /* A compare with a shifted operand. Because of canonicalization, the
14603 comparison will have to be swapped when we emit the assembler. */
14604 if (GET_MODE (y
) == SImode
14605 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14606 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14607 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14608 || GET_CODE (x
) == ROTATERT
))
14611 /* This operation is performed swapped, but since we only rely on the Z
14612 flag we don't need an additional mode. */
14613 if (GET_MODE (y
) == SImode
14614 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14615 && GET_CODE (x
) == NEG
14616 && (op
== EQ
|| op
== NE
))
14619 /* This is a special case that is used by combine to allow a
14620 comparison of a shifted byte load to be split into a zero-extend
14621 followed by a comparison of the shifted integer (only valid for
14622 equalities and unsigned inequalities). */
14623 if (GET_MODE (x
) == SImode
14624 && GET_CODE (x
) == ASHIFT
14625 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14626 && GET_CODE (XEXP (x
, 0)) == SUBREG
14627 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14628 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14629 && (op
== EQ
|| op
== NE
14630 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14631 && CONST_INT_P (y
))
14634 /* A construct for a conditional compare, if the false arm contains
14635 0, then both conditions must be true, otherwise either condition
14636 must be true. Not all conditions are possible, so CCmode is
14637 returned if it can't be done. */
14638 if (GET_CODE (x
) == IF_THEN_ELSE
14639 && (XEXP (x
, 2) == const0_rtx
14640 || XEXP (x
, 2) == const1_rtx
)
14641 && COMPARISON_P (XEXP (x
, 0))
14642 && COMPARISON_P (XEXP (x
, 1)))
14643 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14644 INTVAL (XEXP (x
, 2)));
14646 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14647 if (GET_CODE (x
) == AND
14648 && (op
== EQ
|| op
== NE
)
14649 && COMPARISON_P (XEXP (x
, 0))
14650 && COMPARISON_P (XEXP (x
, 1)))
14651 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14654 if (GET_CODE (x
) == IOR
14655 && (op
== EQ
|| op
== NE
)
14656 && COMPARISON_P (XEXP (x
, 0))
14657 && COMPARISON_P (XEXP (x
, 1)))
14658 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14661 /* An operation (on Thumb) where we want to test for a single bit.
14662 This is done by shifting that bit up into the top bit of a
14663 scratch register; we can then branch on the sign bit. */
14665 && GET_MODE (x
) == SImode
14666 && (op
== EQ
|| op
== NE
)
14667 && GET_CODE (x
) == ZERO_EXTRACT
14668 && XEXP (x
, 1) == const1_rtx
)
14671 /* An operation that sets the condition codes as a side-effect, the
14672 V flag is not set correctly, so we can only use comparisons where
14673 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14675 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14676 if (GET_MODE (x
) == SImode
14678 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14679 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14680 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14681 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14682 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14683 || GET_CODE (x
) == LSHIFTRT
14684 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14685 || GET_CODE (x
) == ROTATERT
14686 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14687 return CC_NOOVmode
;
14689 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14692 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14693 && GET_CODE (x
) == PLUS
14694 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14697 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14703 /* A DImode comparison against zero can be implemented by
14704 or'ing the two halves together. */
14705 if (y
== const0_rtx
)
14708 /* We can do an equality test in three Thumb instructions. */
14718 /* DImode unsigned comparisons can be implemented by cmp +
14719 cmpeq without a scratch register. Not worth doing in
14730 /* DImode signed and unsigned comparisons can be implemented
14731 by cmp + sbcs with a scratch register, but that does not
14732 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14733 gcc_assert (op
!= EQ
&& op
!= NE
);
14737 gcc_unreachable ();
14741 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14742 return GET_MODE (x
);
14747 /* X and Y are two things to compare using CODE. Emit the compare insn and
14748 return the rtx for register 0 in the proper mode. FP means this is a
14749 floating point compare: I don't think that it is needed on the arm. */
14751 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14755 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14757 /* We might have X as a constant, Y as a register because of the predicates
14758 used for cmpdi. If so, force X to a register here. */
14759 if (dimode_comparison
&& !REG_P (x
))
14760 x
= force_reg (DImode
, x
);
14762 mode
= SELECT_CC_MODE (code
, x
, y
);
14763 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14765 if (dimode_comparison
14766 && mode
!= CC_CZmode
)
14770 /* To compare two non-zero values for equality, XOR them and
14771 then compare against zero. Not used for ARM mode; there
14772 CC_CZmode is cheaper. */
14773 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14775 gcc_assert (!reload_completed
);
14776 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14780 /* A scratch register is required. */
14781 if (reload_completed
)
14782 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14784 scratch
= gen_rtx_SCRATCH (SImode
);
14786 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14787 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14788 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14791 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14796 /* Generate a sequence of insns that will generate the correct return
14797 address mask depending on the physical architecture that the program
14800 arm_gen_return_addr_mask (void)
14802 rtx reg
= gen_reg_rtx (Pmode
);
14804 emit_insn (gen_return_addr_mask (reg
));
14809 arm_reload_in_hi (rtx
*operands
)
14811 rtx ref
= operands
[1];
14813 HOST_WIDE_INT offset
= 0;
14815 if (GET_CODE (ref
) == SUBREG
)
14817 offset
= SUBREG_BYTE (ref
);
14818 ref
= SUBREG_REG (ref
);
14823 /* We have a pseudo which has been spilt onto the stack; there
14824 are two cases here: the first where there is a simple
14825 stack-slot replacement and a second where the stack-slot is
14826 out of range, or is used as a subreg. */
14827 if (reg_equiv_mem (REGNO (ref
)))
14829 ref
= reg_equiv_mem (REGNO (ref
));
14830 base
= find_replacement (&XEXP (ref
, 0));
14833 /* The slot is out of range, or was dressed up in a SUBREG. */
14834 base
= reg_equiv_address (REGNO (ref
));
14836 /* PR 62554: If there is no equivalent memory location then just move
14837 the value as an SImode register move. This happens when the target
14838 architecture variant does not have an HImode register move. */
14841 gcc_assert (REG_P (operands
[0]));
14842 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14843 gen_rtx_SUBREG (SImode
, ref
, 0)));
14848 base
= find_replacement (&XEXP (ref
, 0));
14850 /* Handle the case where the address is too complex to be offset by 1. */
14851 if (GET_CODE (base
) == MINUS
14852 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14854 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14856 emit_set_insn (base_plus
, base
);
14859 else if (GET_CODE (base
) == PLUS
)
14861 /* The addend must be CONST_INT, or we would have dealt with it above. */
14862 HOST_WIDE_INT hi
, lo
;
14864 offset
+= INTVAL (XEXP (base
, 1));
14865 base
= XEXP (base
, 0);
14867 /* Rework the address into a legal sequence of insns. */
14868 /* Valid range for lo is -4095 -> 4095 */
14871 : -((-offset
) & 0xfff));
14873 /* Corner case, if lo is the max offset then we would be out of range
14874 once we have added the additional 1 below, so bump the msb into the
14875 pre-loading insn(s). */
14879 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14880 ^ (HOST_WIDE_INT
) 0x80000000)
14881 - (HOST_WIDE_INT
) 0x80000000);
14883 gcc_assert (hi
+ lo
== offset
);
14887 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14889 /* Get the base address; addsi3 knows how to handle constants
14890 that require more than one insn. */
14891 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14897 /* Operands[2] may overlap operands[0] (though it won't overlap
14898 operands[1]), that's why we asked for a DImode reg -- so we can
14899 use the bit that does not overlap. */
14900 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14901 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14903 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14905 emit_insn (gen_zero_extendqisi2 (scratch
,
14906 gen_rtx_MEM (QImode
,
14907 plus_constant (Pmode
, base
,
14909 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14910 gen_rtx_MEM (QImode
,
14911 plus_constant (Pmode
, base
,
14913 if (!BYTES_BIG_ENDIAN
)
14914 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14915 gen_rtx_IOR (SImode
,
14918 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14922 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14923 gen_rtx_IOR (SImode
,
14924 gen_rtx_ASHIFT (SImode
, scratch
,
14926 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14929 /* Handle storing a half-word to memory during reload by synthesizing as two
14930 byte stores. Take care not to clobber the input values until after we
14931 have moved them somewhere safe. This code assumes that if the DImode
14932 scratch in operands[2] overlaps either the input value or output address
14933 in some way, then that value must die in this insn (we absolutely need
14934 two scratch registers for some corner cases). */
14936 arm_reload_out_hi (rtx
*operands
)
14938 rtx ref
= operands
[0];
14939 rtx outval
= operands
[1];
14941 HOST_WIDE_INT offset
= 0;
14943 if (GET_CODE (ref
) == SUBREG
)
14945 offset
= SUBREG_BYTE (ref
);
14946 ref
= SUBREG_REG (ref
);
14951 /* We have a pseudo which has been spilt onto the stack; there
14952 are two cases here: the first where there is a simple
14953 stack-slot replacement and a second where the stack-slot is
14954 out of range, or is used as a subreg. */
14955 if (reg_equiv_mem (REGNO (ref
)))
14957 ref
= reg_equiv_mem (REGNO (ref
));
14958 base
= find_replacement (&XEXP (ref
, 0));
14961 /* The slot is out of range, or was dressed up in a SUBREG. */
14962 base
= reg_equiv_address (REGNO (ref
));
14964 /* PR 62254: If there is no equivalent memory location then just move
14965 the value as an SImode register move. This happens when the target
14966 architecture variant does not have an HImode register move. */
14969 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
14971 if (REG_P (outval
))
14973 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14974 gen_rtx_SUBREG (SImode
, outval
, 0)));
14976 else /* SUBREG_P (outval) */
14978 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
14979 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
14980 SUBREG_REG (outval
)));
14982 /* FIXME: Handle other cases ? */
14983 gcc_unreachable ();
14989 base
= find_replacement (&XEXP (ref
, 0));
14991 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14993 /* Handle the case where the address is too complex to be offset by 1. */
14994 if (GET_CODE (base
) == MINUS
14995 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14997 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14999 /* Be careful not to destroy OUTVAL. */
15000 if (reg_overlap_mentioned_p (base_plus
, outval
))
15002 /* Updating base_plus might destroy outval, see if we can
15003 swap the scratch and base_plus. */
15004 if (!reg_overlap_mentioned_p (scratch
, outval
))
15005 std::swap (scratch
, base_plus
);
15008 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15010 /* Be conservative and copy OUTVAL into the scratch now,
15011 this should only be necessary if outval is a subreg
15012 of something larger than a word. */
15013 /* XXX Might this clobber base? I can't see how it can,
15014 since scratch is known to overlap with OUTVAL, and
15015 must be wider than a word. */
15016 emit_insn (gen_movhi (scratch_hi
, outval
));
15017 outval
= scratch_hi
;
15021 emit_set_insn (base_plus
, base
);
15024 else if (GET_CODE (base
) == PLUS
)
15026 /* The addend must be CONST_INT, or we would have dealt with it above. */
15027 HOST_WIDE_INT hi
, lo
;
15029 offset
+= INTVAL (XEXP (base
, 1));
15030 base
= XEXP (base
, 0);
15032 /* Rework the address into a legal sequence of insns. */
15033 /* Valid range for lo is -4095 -> 4095 */
15036 : -((-offset
) & 0xfff));
15038 /* Corner case, if lo is the max offset then we would be out of range
15039 once we have added the additional 1 below, so bump the msb into the
15040 pre-loading insn(s). */
15044 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15045 ^ (HOST_WIDE_INT
) 0x80000000)
15046 - (HOST_WIDE_INT
) 0x80000000);
15048 gcc_assert (hi
+ lo
== offset
);
15052 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15054 /* Be careful not to destroy OUTVAL. */
15055 if (reg_overlap_mentioned_p (base_plus
, outval
))
15057 /* Updating base_plus might destroy outval, see if we
15058 can swap the scratch and base_plus. */
15059 if (!reg_overlap_mentioned_p (scratch
, outval
))
15060 std::swap (scratch
, base_plus
);
15063 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15065 /* Be conservative and copy outval into scratch now,
15066 this should only be necessary if outval is a
15067 subreg of something larger than a word. */
15068 /* XXX Might this clobber base? I can't see how it
15069 can, since scratch is known to overlap with
15071 emit_insn (gen_movhi (scratch_hi
, outval
));
15072 outval
= scratch_hi
;
15076 /* Get the base address; addsi3 knows how to handle constants
15077 that require more than one insn. */
15078 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15084 if (BYTES_BIG_ENDIAN
)
15086 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15087 plus_constant (Pmode
, base
,
15089 gen_lowpart (QImode
, outval
)));
15090 emit_insn (gen_lshrsi3 (scratch
,
15091 gen_rtx_SUBREG (SImode
, outval
, 0),
15093 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15095 gen_lowpart (QImode
, scratch
)));
15099 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15101 gen_lowpart (QImode
, outval
)));
15102 emit_insn (gen_lshrsi3 (scratch
,
15103 gen_rtx_SUBREG (SImode
, outval
, 0),
15105 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15106 plus_constant (Pmode
, base
,
15108 gen_lowpart (QImode
, scratch
)));
15112 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15113 (padded to the size of a word) should be passed in a register. */
15116 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15118 if (TARGET_AAPCS_BASED
)
15119 return must_pass_in_stack_var_size (mode
, type
);
15121 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15125 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15126 Return true if an argument passed on the stack should be padded upwards,
15127 i.e. if the least-significant byte has useful data.
15128 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15129 aggregate types are placed in the lowest memory address. */
15132 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED
, const_tree type
)
15134 if (!TARGET_AAPCS_BASED
)
15135 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
15137 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15144 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15145 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15146 register has useful data, and return the opposite if the most
15147 significant byte does. */
15150 arm_pad_reg_upward (machine_mode mode
,
15151 tree type
, int first ATTRIBUTE_UNUSED
)
15153 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15155 /* For AAPCS, small aggregates, small fixed-point types,
15156 and small complex types are always padded upwards. */
15159 if ((AGGREGATE_TYPE_P (type
)
15160 || TREE_CODE (type
) == COMPLEX_TYPE
15161 || FIXED_POINT_TYPE_P (type
))
15162 && int_size_in_bytes (type
) <= 4)
15167 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15168 && GET_MODE_SIZE (mode
) <= 4)
15173 /* Otherwise, use default padding. */
15174 return !BYTES_BIG_ENDIAN
;
15177 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15178 assuming that the address in the base register is word aligned. */
15180 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15182 HOST_WIDE_INT max_offset
;
15184 /* Offset must be a multiple of 4 in Thumb mode. */
15185 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15190 else if (TARGET_ARM
)
15195 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15198 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15199 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15200 Assumes that the address in the base register RN is word aligned. Pattern
15201 guarantees that both memory accesses use the same base register,
15202 the offsets are constants within the range, and the gap between the offsets is 4.
15203 If preload complete then check that registers are legal. WBACK indicates whether
15204 address is updated. LOAD indicates whether memory access is load or store. */
15206 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15207 bool wback
, bool load
)
15209 unsigned int t
, t2
, n
;
15211 if (!reload_completed
)
15214 if (!offset_ok_for_ldrd_strd (offset
))
15221 if ((TARGET_THUMB2
)
15222 && ((wback
&& (n
== t
|| n
== t2
))
15223 || (t
== SP_REGNUM
)
15224 || (t
== PC_REGNUM
)
15225 || (t2
== SP_REGNUM
)
15226 || (t2
== PC_REGNUM
)
15227 || (!load
&& (n
== PC_REGNUM
))
15228 || (load
&& (t
== t2
))
15229 /* Triggers Cortex-M3 LDRD errata. */
15230 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15234 && ((wback
&& (n
== t
|| n
== t2
))
15235 || (t2
== PC_REGNUM
)
15236 || (t
% 2 != 0) /* First destination register is not even. */
15238 /* PC can be used as base register (for offset addressing only),
15239 but it is depricated. */
15240 || (n
== PC_REGNUM
)))
15246 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15247 operand MEM's address contains an immediate offset from the base
15248 register and has no side effects, in which case it sets BASE and
15249 OFFSET accordingly. */
15251 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
)
15255 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15257 /* TODO: Handle more general memory operand patterns, such as
15258 PRE_DEC and PRE_INC. */
15260 if (side_effects_p (mem
))
15263 /* Can't deal with subregs. */
15264 if (GET_CODE (mem
) == SUBREG
)
15267 gcc_assert (MEM_P (mem
));
15269 *offset
= const0_rtx
;
15271 addr
= XEXP (mem
, 0);
15273 /* If addr isn't valid for DImode, then we can't handle it. */
15274 if (!arm_legitimate_address_p (DImode
, addr
,
15275 reload_in_progress
|| reload_completed
))
15283 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15285 *base
= XEXP (addr
, 0);
15286 *offset
= XEXP (addr
, 1);
15287 return (REG_P (*base
) && CONST_INT_P (*offset
));
15293 /* Called from a peephole2 to replace two word-size accesses with a
15294 single LDRD/STRD instruction. Returns true iff we can generate a
15295 new instruction sequence. That is, both accesses use the same base
15296 register and the gap between constant offsets is 4. This function
15297 may reorder its operands to match ldrd/strd RTL templates.
15298 OPERANDS are the operands found by the peephole matcher;
15299 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15300 corresponding memory operands. LOAD indicaates whether the access
15301 is load or store. CONST_STORE indicates a store of constant
15302 integer values held in OPERANDS[4,5] and assumes that the pattern
15303 is of length 4 insn, for the purpose of checking dead registers.
15304 COMMUTE indicates that register operands may be reordered. */
15306 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15307 bool const_store
, bool commute
)
15310 HOST_WIDE_INT offsets
[2], offset
;
15311 rtx base
= NULL_RTX
;
15312 rtx cur_base
, cur_offset
, tmp
;
15314 HARD_REG_SET regset
;
15316 gcc_assert (!const_store
|| !load
);
15317 /* Check that the memory references are immediate offsets from the
15318 same base register. Extract the base register, the destination
15319 registers, and the corresponding memory offsets. */
15320 for (i
= 0; i
< nops
; i
++)
15322 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
))
15327 else if (REGNO (base
) != REGNO (cur_base
))
15330 offsets
[i
] = INTVAL (cur_offset
);
15331 if (GET_CODE (operands
[i
]) == SUBREG
)
15333 tmp
= SUBREG_REG (operands
[i
]);
15334 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15339 /* Make sure there is no dependency between the individual loads. */
15340 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15341 return false; /* RAW */
15343 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15344 return false; /* WAW */
15346 /* If the same input register is used in both stores
15347 when storing different constants, try to find a free register.
15348 For example, the code
15353 can be transformed into
15357 in Thumb mode assuming that r1 is free.
15358 For ARM mode do the same but only if the starting register
15359 can be made to be even. */
15361 && REGNO (operands
[0]) == REGNO (operands
[1])
15362 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15366 CLEAR_HARD_REG_SET (regset
);
15367 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15368 if (tmp
== NULL_RTX
)
15371 /* Use the new register in the first load to ensure that
15372 if the original input register is not dead after peephole,
15373 then it will have the correct constant value. */
15376 else if (TARGET_ARM
)
15378 int regno
= REGNO (operands
[0]);
15379 if (!peep2_reg_dead_p (4, operands
[0]))
15381 /* When the input register is even and is not dead after the
15382 pattern, it has to hold the second constant but we cannot
15383 form a legal STRD in ARM mode with this register as the second
15385 if (regno
% 2 == 0)
15388 /* Is regno-1 free? */
15389 SET_HARD_REG_SET (regset
);
15390 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15391 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15392 if (tmp
== NULL_RTX
)
15399 /* Find a DImode register. */
15400 CLEAR_HARD_REG_SET (regset
);
15401 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15402 if (tmp
!= NULL_RTX
)
15404 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15405 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15409 /* Can we use the input register to form a DI register? */
15410 SET_HARD_REG_SET (regset
);
15411 CLEAR_HARD_REG_BIT(regset
,
15412 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15413 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15414 if (tmp
== NULL_RTX
)
15416 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15420 gcc_assert (operands
[0] != NULL_RTX
);
15421 gcc_assert (operands
[1] != NULL_RTX
);
15422 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15423 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15427 /* Make sure the instructions are ordered with lower memory access first. */
15428 if (offsets
[0] > offsets
[1])
15430 gap
= offsets
[0] - offsets
[1];
15431 offset
= offsets
[1];
15433 /* Swap the instructions such that lower memory is accessed first. */
15434 std::swap (operands
[0], operands
[1]);
15435 std::swap (operands
[2], operands
[3]);
15437 std::swap (operands
[4], operands
[5]);
15441 gap
= offsets
[1] - offsets
[0];
15442 offset
= offsets
[0];
15445 /* Make sure accesses are to consecutive memory locations. */
15449 /* Make sure we generate legal instructions. */
15450 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15454 /* In Thumb state, where registers are almost unconstrained, there
15455 is little hope to fix it. */
15459 if (load
&& commute
)
15461 /* Try reordering registers. */
15462 std::swap (operands
[0], operands
[1]);
15463 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15470 /* If input registers are dead after this pattern, they can be
15471 reordered or replaced by other registers that are free in the
15472 current pattern. */
15473 if (!peep2_reg_dead_p (4, operands
[0])
15474 || !peep2_reg_dead_p (4, operands
[1]))
15477 /* Try to reorder the input registers. */
15478 /* For example, the code
15483 can be transformed into
15488 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15491 std::swap (operands
[0], operands
[1]);
15495 /* Try to find a free DI register. */
15496 CLEAR_HARD_REG_SET (regset
);
15497 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15498 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15501 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15502 if (tmp
== NULL_RTX
)
15505 /* DREG must be an even-numbered register in DImode.
15506 Split it into SI registers. */
15507 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15508 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15509 gcc_assert (operands
[0] != NULL_RTX
);
15510 gcc_assert (operands
[1] != NULL_RTX
);
15511 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15512 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15514 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15526 /* Print a symbolic form of X to the debug file, F. */
15528 arm_print_value (FILE *f
, rtx x
)
15530 switch (GET_CODE (x
))
15533 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15537 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15545 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15547 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15548 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15556 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15560 fprintf (f
, "`%s'", XSTR (x
, 0));
15564 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15568 arm_print_value (f
, XEXP (x
, 0));
15572 arm_print_value (f
, XEXP (x
, 0));
15574 arm_print_value (f
, XEXP (x
, 1));
15582 fprintf (f
, "????");
15587 /* Routines for manipulation of the constant pool. */
15589 /* Arm instructions cannot load a large constant directly into a
15590 register; they have to come from a pc relative load. The constant
15591 must therefore be placed in the addressable range of the pc
15592 relative load. Depending on the precise pc relative load
15593 instruction the range is somewhere between 256 bytes and 4k. This
15594 means that we often have to dump a constant inside a function, and
15595 generate code to branch around it.
15597 It is important to minimize this, since the branches will slow
15598 things down and make the code larger.
15600 Normally we can hide the table after an existing unconditional
15601 branch so that there is no interruption of the flow, but in the
15602 worst case the code looks like this:
15620 We fix this by performing a scan after scheduling, which notices
15621 which instructions need to have their operands fetched from the
15622 constant table and builds the table.
15624 The algorithm starts by building a table of all the constants that
15625 need fixing up and all the natural barriers in the function (places
15626 where a constant table can be dropped without breaking the flow).
15627 For each fixup we note how far the pc-relative replacement will be
15628 able to reach and the offset of the instruction into the function.
15630 Having built the table we then group the fixes together to form
15631 tables that are as large as possible (subject to addressing
15632 constraints) and emit each table of constants after the last
15633 barrier that is within range of all the instructions in the group.
15634 If a group does not contain a barrier, then we forcibly create one
15635 by inserting a jump instruction into the flow. Once the table has
15636 been inserted, the insns are then modified to reference the
15637 relevant entry in the pool.
15639 Possible enhancements to the algorithm (not implemented) are:
15641 1) For some processors and object formats, there may be benefit in
15642 aligning the pools to the start of cache lines; this alignment
15643 would need to be taken into account when calculating addressability
15646 /* These typedefs are located at the start of this file, so that
15647 they can be used in the prototypes there. This comment is to
15648 remind readers of that fact so that the following structures
15649 can be understood more easily.
15651 typedef struct minipool_node Mnode;
15652 typedef struct minipool_fixup Mfix; */
15654 struct minipool_node
15656 /* Doubly linked chain of entries. */
15659 /* The maximum offset into the code that this entry can be placed. While
15660 pushing fixes for forward references, all entries are sorted in order
15661 of increasing max_address. */
15662 HOST_WIDE_INT max_address
;
15663 /* Similarly for an entry inserted for a backwards ref. */
15664 HOST_WIDE_INT min_address
;
15665 /* The number of fixes referencing this entry. This can become zero
15666 if we "unpush" an entry. In this case we ignore the entry when we
15667 come to emit the code. */
15669 /* The offset from the start of the minipool. */
15670 HOST_WIDE_INT offset
;
15671 /* The value in table. */
15673 /* The mode of value. */
15675 /* The size of the value. With iWMMXt enabled
15676 sizes > 4 also imply an alignment of 8-bytes. */
15680 struct minipool_fixup
15684 HOST_WIDE_INT address
;
15690 HOST_WIDE_INT forwards
;
15691 HOST_WIDE_INT backwards
;
15694 /* Fixes less than a word need padding out to a word boundary. */
15695 #define MINIPOOL_FIX_SIZE(mode) \
15696 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15698 static Mnode
* minipool_vector_head
;
15699 static Mnode
* minipool_vector_tail
;
15700 static rtx_code_label
*minipool_vector_label
;
15701 static int minipool_pad
;
15703 /* The linked list of all minipool fixes required for this function. */
15704 Mfix
* minipool_fix_head
;
15705 Mfix
* minipool_fix_tail
;
15706 /* The fix entry for the current minipool, once it has been placed. */
15707 Mfix
* minipool_barrier
;
15709 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15710 #define JUMP_TABLES_IN_TEXT_SECTION 0
15713 static HOST_WIDE_INT
15714 get_jump_table_size (rtx_jump_table_data
*insn
)
15716 /* ADDR_VECs only take room if read-only data does into the text
15718 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15720 rtx body
= PATTERN (insn
);
15721 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15722 HOST_WIDE_INT size
;
15723 HOST_WIDE_INT modesize
;
15725 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15726 size
= modesize
* XVECLEN (body
, elt
);
15730 /* Round up size of TBB table to a halfword boundary. */
15731 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15734 /* No padding necessary for TBH. */
15737 /* Add two bytes for alignment on Thumb. */
15742 gcc_unreachable ();
15750 /* Return the maximum amount of padding that will be inserted before
15753 static HOST_WIDE_INT
15754 get_label_padding (rtx label
)
15756 HOST_WIDE_INT align
, min_insn_size
;
15758 align
= 1 << label_to_alignment (label
);
15759 min_insn_size
= TARGET_THUMB
? 2 : 4;
15760 return align
> min_insn_size
? align
- min_insn_size
: 0;
15763 /* Move a minipool fix MP from its current location to before MAX_MP.
15764 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15765 constraints may need updating. */
15767 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15768 HOST_WIDE_INT max_address
)
15770 /* The code below assumes these are different. */
15771 gcc_assert (mp
!= max_mp
);
15773 if (max_mp
== NULL
)
15775 if (max_address
< mp
->max_address
)
15776 mp
->max_address
= max_address
;
15780 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15781 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15783 mp
->max_address
= max_address
;
15785 /* Unlink MP from its current position. Since max_mp is non-null,
15786 mp->prev must be non-null. */
15787 mp
->prev
->next
= mp
->next
;
15788 if (mp
->next
!= NULL
)
15789 mp
->next
->prev
= mp
->prev
;
15791 minipool_vector_tail
= mp
->prev
;
15793 /* Re-insert it before MAX_MP. */
15795 mp
->prev
= max_mp
->prev
;
15798 if (mp
->prev
!= NULL
)
15799 mp
->prev
->next
= mp
;
15801 minipool_vector_head
= mp
;
15804 /* Save the new entry. */
15807 /* Scan over the preceding entries and adjust their addresses as
15809 while (mp
->prev
!= NULL
15810 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15812 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15819 /* Add a constant to the minipool for a forward reference. Returns the
15820 node added or NULL if the constant will not fit in this pool. */
15822 add_minipool_forward_ref (Mfix
*fix
)
15824 /* If set, max_mp is the first pool_entry that has a lower
15825 constraint than the one we are trying to add. */
15826 Mnode
* max_mp
= NULL
;
15827 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15830 /* If the minipool starts before the end of FIX->INSN then this FIX
15831 can not be placed into the current pool. Furthermore, adding the
15832 new constant pool entry may cause the pool to start FIX_SIZE bytes
15834 if (minipool_vector_head
&&
15835 (fix
->address
+ get_attr_length (fix
->insn
)
15836 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15839 /* Scan the pool to see if a constant with the same value has
15840 already been added. While we are doing this, also note the
15841 location where we must insert the constant if it doesn't already
15843 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15845 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15846 && fix
->mode
== mp
->mode
15847 && (!LABEL_P (fix
->value
)
15848 || (CODE_LABEL_NUMBER (fix
->value
)
15849 == CODE_LABEL_NUMBER (mp
->value
)))
15850 && rtx_equal_p (fix
->value
, mp
->value
))
15852 /* More than one fix references this entry. */
15854 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15857 /* Note the insertion point if necessary. */
15859 && mp
->max_address
> max_address
)
15862 /* If we are inserting an 8-bytes aligned quantity and
15863 we have not already found an insertion point, then
15864 make sure that all such 8-byte aligned quantities are
15865 placed at the start of the pool. */
15866 if (ARM_DOUBLEWORD_ALIGN
15868 && fix
->fix_size
>= 8
15869 && mp
->fix_size
< 8)
15872 max_address
= mp
->max_address
;
15876 /* The value is not currently in the minipool, so we need to create
15877 a new entry for it. If MAX_MP is NULL, the entry will be put on
15878 the end of the list since the placement is less constrained than
15879 any existing entry. Otherwise, we insert the new fix before
15880 MAX_MP and, if necessary, adjust the constraints on the other
15883 mp
->fix_size
= fix
->fix_size
;
15884 mp
->mode
= fix
->mode
;
15885 mp
->value
= fix
->value
;
15887 /* Not yet required for a backwards ref. */
15888 mp
->min_address
= -65536;
15890 if (max_mp
== NULL
)
15892 mp
->max_address
= max_address
;
15894 mp
->prev
= minipool_vector_tail
;
15896 if (mp
->prev
== NULL
)
15898 minipool_vector_head
= mp
;
15899 minipool_vector_label
= gen_label_rtx ();
15902 mp
->prev
->next
= mp
;
15904 minipool_vector_tail
= mp
;
15908 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15909 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15911 mp
->max_address
= max_address
;
15914 mp
->prev
= max_mp
->prev
;
15916 if (mp
->prev
!= NULL
)
15917 mp
->prev
->next
= mp
;
15919 minipool_vector_head
= mp
;
15922 /* Save the new entry. */
15925 /* Scan over the preceding entries and adjust their addresses as
15927 while (mp
->prev
!= NULL
15928 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15930 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15938 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
15939 HOST_WIDE_INT min_address
)
15941 HOST_WIDE_INT offset
;
15943 /* The code below assumes these are different. */
15944 gcc_assert (mp
!= min_mp
);
15946 if (min_mp
== NULL
)
15948 if (min_address
> mp
->min_address
)
15949 mp
->min_address
= min_address
;
15953 /* We will adjust this below if it is too loose. */
15954 mp
->min_address
= min_address
;
15956 /* Unlink MP from its current position. Since min_mp is non-null,
15957 mp->next must be non-null. */
15958 mp
->next
->prev
= mp
->prev
;
15959 if (mp
->prev
!= NULL
)
15960 mp
->prev
->next
= mp
->next
;
15962 minipool_vector_head
= mp
->next
;
15964 /* Reinsert it after MIN_MP. */
15966 mp
->next
= min_mp
->next
;
15968 if (mp
->next
!= NULL
)
15969 mp
->next
->prev
= mp
;
15971 minipool_vector_tail
= mp
;
15977 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15979 mp
->offset
= offset
;
15980 if (mp
->refcount
> 0)
15981 offset
+= mp
->fix_size
;
15983 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
15984 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
15990 /* Add a constant to the minipool for a backward reference. Returns the
15991 node added or NULL if the constant will not fit in this pool.
15993 Note that the code for insertion for a backwards reference can be
15994 somewhat confusing because the calculated offsets for each fix do
15995 not take into account the size of the pool (which is still under
15998 add_minipool_backward_ref (Mfix
*fix
)
16000 /* If set, min_mp is the last pool_entry that has a lower constraint
16001 than the one we are trying to add. */
16002 Mnode
*min_mp
= NULL
;
16003 /* This can be negative, since it is only a constraint. */
16004 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16007 /* If we can't reach the current pool from this insn, or if we can't
16008 insert this entry at the end of the pool without pushing other
16009 fixes out of range, then we don't try. This ensures that we
16010 can't fail later on. */
16011 if (min_address
>= minipool_barrier
->address
16012 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16013 >= minipool_barrier
->address
))
16016 /* Scan the pool to see if a constant with the same value has
16017 already been added. While we are doing this, also note the
16018 location where we must insert the constant if it doesn't already
16020 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16022 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16023 && fix
->mode
== mp
->mode
16024 && (!LABEL_P (fix
->value
)
16025 || (CODE_LABEL_NUMBER (fix
->value
)
16026 == CODE_LABEL_NUMBER (mp
->value
)))
16027 && rtx_equal_p (fix
->value
, mp
->value
)
16028 /* Check that there is enough slack to move this entry to the
16029 end of the table (this is conservative). */
16030 && (mp
->max_address
16031 > (minipool_barrier
->address
16032 + minipool_vector_tail
->offset
16033 + minipool_vector_tail
->fix_size
)))
16036 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16039 if (min_mp
!= NULL
)
16040 mp
->min_address
+= fix
->fix_size
;
16043 /* Note the insertion point if necessary. */
16044 if (mp
->min_address
< min_address
)
16046 /* For now, we do not allow the insertion of 8-byte alignment
16047 requiring nodes anywhere but at the start of the pool. */
16048 if (ARM_DOUBLEWORD_ALIGN
16049 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16054 else if (mp
->max_address
16055 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16057 /* Inserting before this entry would push the fix beyond
16058 its maximum address (which can happen if we have
16059 re-located a forwards fix); force the new fix to come
16061 if (ARM_DOUBLEWORD_ALIGN
16062 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16067 min_address
= mp
->min_address
+ fix
->fix_size
;
16070 /* Do not insert a non-8-byte aligned quantity before 8-byte
16071 aligned quantities. */
16072 else if (ARM_DOUBLEWORD_ALIGN
16073 && fix
->fix_size
< 8
16074 && mp
->fix_size
>= 8)
16077 min_address
= mp
->min_address
+ fix
->fix_size
;
16082 /* We need to create a new entry. */
16084 mp
->fix_size
= fix
->fix_size
;
16085 mp
->mode
= fix
->mode
;
16086 mp
->value
= fix
->value
;
16088 mp
->max_address
= minipool_barrier
->address
+ 65536;
16090 mp
->min_address
= min_address
;
16092 if (min_mp
== NULL
)
16095 mp
->next
= minipool_vector_head
;
16097 if (mp
->next
== NULL
)
16099 minipool_vector_tail
= mp
;
16100 minipool_vector_label
= gen_label_rtx ();
16103 mp
->next
->prev
= mp
;
16105 minipool_vector_head
= mp
;
16109 mp
->next
= min_mp
->next
;
16113 if (mp
->next
!= NULL
)
16114 mp
->next
->prev
= mp
;
16116 minipool_vector_tail
= mp
;
16119 /* Save the new entry. */
16127 /* Scan over the following entries and adjust their offsets. */
16128 while (mp
->next
!= NULL
)
16130 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16131 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16134 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16136 mp
->next
->offset
= mp
->offset
;
16145 assign_minipool_offsets (Mfix
*barrier
)
16147 HOST_WIDE_INT offset
= 0;
16150 minipool_barrier
= barrier
;
16152 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16154 mp
->offset
= offset
;
16156 if (mp
->refcount
> 0)
16157 offset
+= mp
->fix_size
;
16161 /* Output the literal table */
16163 dump_minipool (rtx_insn
*scan
)
16169 if (ARM_DOUBLEWORD_ALIGN
)
16170 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16171 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16178 fprintf (dump_file
,
16179 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16180 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16182 scan
= emit_label_after (gen_label_rtx (), scan
);
16183 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16184 scan
= emit_label_after (minipool_vector_label
, scan
);
16186 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16188 if (mp
->refcount
> 0)
16192 fprintf (dump_file
,
16193 ";; Offset %u, min %ld, max %ld ",
16194 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16195 (unsigned long) mp
->max_address
);
16196 arm_print_value (dump_file
, mp
->value
);
16197 fputc ('\n', dump_file
);
16200 rtx val
= copy_rtx (mp
->value
);
16202 switch (GET_MODE_SIZE (mp
->mode
))
16204 #ifdef HAVE_consttable_1
16206 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16210 #ifdef HAVE_consttable_2
16212 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16216 #ifdef HAVE_consttable_4
16218 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16222 #ifdef HAVE_consttable_8
16224 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16228 #ifdef HAVE_consttable_16
16230 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16235 gcc_unreachable ();
16243 minipool_vector_head
= minipool_vector_tail
= NULL
;
16244 scan
= emit_insn_after (gen_consttable_end (), scan
);
16245 scan
= emit_barrier_after (scan
);
16248 /* Return the cost of forcibly inserting a barrier after INSN. */
16250 arm_barrier_cost (rtx_insn
*insn
)
16252 /* Basing the location of the pool on the loop depth is preferable,
16253 but at the moment, the basic block information seems to be
16254 corrupt by this stage of the compilation. */
16255 int base_cost
= 50;
16256 rtx_insn
*next
= next_nonnote_insn (insn
);
16258 if (next
!= NULL
&& LABEL_P (next
))
16261 switch (GET_CODE (insn
))
16264 /* It will always be better to place the table before the label, rather
16273 return base_cost
- 10;
16276 return base_cost
+ 10;
16280 /* Find the best place in the insn stream in the range
16281 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16282 Create the barrier by inserting a jump and add a new fix entry for
16285 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16287 HOST_WIDE_INT count
= 0;
16288 rtx_barrier
*barrier
;
16289 rtx_insn
*from
= fix
->insn
;
16290 /* The instruction after which we will insert the jump. */
16291 rtx_insn
*selected
= NULL
;
16293 /* The address at which the jump instruction will be placed. */
16294 HOST_WIDE_INT selected_address
;
16296 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16297 rtx_code_label
*label
= gen_label_rtx ();
16299 selected_cost
= arm_barrier_cost (from
);
16300 selected_address
= fix
->address
;
16302 while (from
&& count
< max_count
)
16304 rtx_jump_table_data
*tmp
;
16307 /* This code shouldn't have been called if there was a natural barrier
16309 gcc_assert (!BARRIER_P (from
));
16311 /* Count the length of this insn. This must stay in sync with the
16312 code that pushes minipool fixes. */
16313 if (LABEL_P (from
))
16314 count
+= get_label_padding (from
);
16316 count
+= get_attr_length (from
);
16318 /* If there is a jump table, add its length. */
16319 if (tablejump_p (from
, NULL
, &tmp
))
16321 count
+= get_jump_table_size (tmp
);
16323 /* Jump tables aren't in a basic block, so base the cost on
16324 the dispatch insn. If we select this location, we will
16325 still put the pool after the table. */
16326 new_cost
= arm_barrier_cost (from
);
16328 if (count
< max_count
16329 && (!selected
|| new_cost
<= selected_cost
))
16332 selected_cost
= new_cost
;
16333 selected_address
= fix
->address
+ count
;
16336 /* Continue after the dispatch table. */
16337 from
= NEXT_INSN (tmp
);
16341 new_cost
= arm_barrier_cost (from
);
16343 if (count
< max_count
16344 && (!selected
|| new_cost
<= selected_cost
))
16347 selected_cost
= new_cost
;
16348 selected_address
= fix
->address
+ count
;
16351 from
= NEXT_INSN (from
);
16354 /* Make sure that we found a place to insert the jump. */
16355 gcc_assert (selected
);
16357 /* Make sure we do not split a call and its corresponding
16358 CALL_ARG_LOCATION note. */
16359 if (CALL_P (selected
))
16361 rtx_insn
*next
= NEXT_INSN (selected
);
16362 if (next
&& NOTE_P (next
)
16363 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16367 /* Create a new JUMP_INSN that branches around a barrier. */
16368 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16369 JUMP_LABEL (from
) = label
;
16370 barrier
= emit_barrier_after (from
);
16371 emit_label_after (label
, barrier
);
16373 /* Create a minipool barrier entry for the new barrier. */
16374 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16375 new_fix
->insn
= barrier
;
16376 new_fix
->address
= selected_address
;
16377 new_fix
->next
= fix
->next
;
16378 fix
->next
= new_fix
;
16383 /* Record that there is a natural barrier in the insn stream at
16386 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16388 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16391 fix
->address
= address
;
16394 if (minipool_fix_head
!= NULL
)
16395 minipool_fix_tail
->next
= fix
;
16397 minipool_fix_head
= fix
;
16399 minipool_fix_tail
= fix
;
16402 /* Record INSN, which will need fixing up to load a value from the
16403 minipool. ADDRESS is the offset of the insn since the start of the
16404 function; LOC is a pointer to the part of the insn which requires
16405 fixing; VALUE is the constant that must be loaded, which is of type
16408 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16409 machine_mode mode
, rtx value
)
16411 gcc_assert (!arm_disable_literal_pool
);
16412 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16415 fix
->address
= address
;
16418 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16419 fix
->value
= value
;
16420 fix
->forwards
= get_attr_pool_range (insn
);
16421 fix
->backwards
= get_attr_neg_pool_range (insn
);
16422 fix
->minipool
= NULL
;
16424 /* If an insn doesn't have a range defined for it, then it isn't
16425 expecting to be reworked by this code. Better to stop now than
16426 to generate duff assembly code. */
16427 gcc_assert (fix
->forwards
|| fix
->backwards
);
16429 /* If an entry requires 8-byte alignment then assume all constant pools
16430 require 4 bytes of padding. Trying to do this later on a per-pool
16431 basis is awkward because existing pool entries have to be modified. */
16432 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16437 fprintf (dump_file
,
16438 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16439 GET_MODE_NAME (mode
),
16440 INSN_UID (insn
), (unsigned long) address
,
16441 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16442 arm_print_value (dump_file
, fix
->value
);
16443 fprintf (dump_file
, "\n");
16446 /* Add it to the chain of fixes. */
16449 if (minipool_fix_head
!= NULL
)
16450 minipool_fix_tail
->next
= fix
;
16452 minipool_fix_head
= fix
;
16454 minipool_fix_tail
= fix
;
16457 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16458 Returns the number of insns needed, or 99 if we always want to synthesize
16461 arm_max_const_double_inline_cost ()
16463 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16466 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16467 Returns the number of insns needed, or 99 if we don't know how to
16470 arm_const_double_inline_cost (rtx val
)
16472 rtx lowpart
, highpart
;
16475 mode
= GET_MODE (val
);
16477 if (mode
== VOIDmode
)
16480 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16482 lowpart
= gen_lowpart (SImode
, val
);
16483 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16485 gcc_assert (CONST_INT_P (lowpart
));
16486 gcc_assert (CONST_INT_P (highpart
));
16488 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16489 NULL_RTX
, NULL_RTX
, 0, 0)
16490 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16491 NULL_RTX
, NULL_RTX
, 0, 0));
16494 /* Cost of loading a SImode constant. */
16496 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16498 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16499 NULL_RTX
, NULL_RTX
, 1, 0);
16502 /* Return true if it is worthwhile to split a 64-bit constant into two
16503 32-bit operations. This is the case if optimizing for size, or
16504 if we have load delay slots, or if one 32-bit part can be done with
16505 a single data operation. */
16507 arm_const_double_by_parts (rtx val
)
16509 machine_mode mode
= GET_MODE (val
);
16512 if (optimize_size
|| arm_ld_sched
)
16515 if (mode
== VOIDmode
)
16518 part
= gen_highpart_mode (SImode
, mode
, val
);
16520 gcc_assert (CONST_INT_P (part
));
16522 if (const_ok_for_arm (INTVAL (part
))
16523 || const_ok_for_arm (~INTVAL (part
)))
16526 part
= gen_lowpart (SImode
, val
);
16528 gcc_assert (CONST_INT_P (part
));
16530 if (const_ok_for_arm (INTVAL (part
))
16531 || const_ok_for_arm (~INTVAL (part
)))
16537 /* Return true if it is possible to inline both the high and low parts
16538 of a 64-bit constant into 32-bit data processing instructions. */
16540 arm_const_double_by_immediates (rtx val
)
16542 machine_mode mode
= GET_MODE (val
);
16545 if (mode
== VOIDmode
)
16548 part
= gen_highpart_mode (SImode
, mode
, val
);
16550 gcc_assert (CONST_INT_P (part
));
16552 if (!const_ok_for_arm (INTVAL (part
)))
16555 part
= gen_lowpart (SImode
, val
);
16557 gcc_assert (CONST_INT_P (part
));
16559 if (!const_ok_for_arm (INTVAL (part
)))
16565 /* Scan INSN and note any of its operands that need fixing.
16566 If DO_PUSHES is false we do not actually push any of the fixups
16569 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16573 extract_constrain_insn (insn
);
16575 if (recog_data
.n_alternatives
== 0)
16578 /* Fill in recog_op_alt with information about the constraints of
16580 preprocess_constraints (insn
);
16582 const operand_alternative
*op_alt
= which_op_alt ();
16583 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16585 /* Things we need to fix can only occur in inputs. */
16586 if (recog_data
.operand_type
[opno
] != OP_IN
)
16589 /* If this alternative is a memory reference, then any mention
16590 of constants in this alternative is really to fool reload
16591 into allowing us to accept one there. We need to fix them up
16592 now so that we output the right code. */
16593 if (op_alt
[opno
].memory_ok
)
16595 rtx op
= recog_data
.operand
[opno
];
16597 if (CONSTANT_P (op
))
16600 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16601 recog_data
.operand_mode
[opno
], op
);
16603 else if (MEM_P (op
)
16604 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16605 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16609 rtx cop
= avoid_constant_pool_reference (op
);
16611 /* Casting the address of something to a mode narrower
16612 than a word can cause avoid_constant_pool_reference()
16613 to return the pool reference itself. That's no good to
16614 us here. Lets just hope that we can use the
16615 constant pool value directly. */
16617 cop
= get_pool_constant (XEXP (op
, 0));
16619 push_minipool_fix (insn
, address
,
16620 recog_data
.operand_loc
[opno
],
16621 recog_data
.operand_mode
[opno
], cop
);
16631 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16632 and unions in the context of ARMv8-M Security Extensions. It is used as a
16633 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16634 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16635 or four masks, depending on whether it is being computed for a
16636 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16637 respectively. The tree for the type of the argument or a field within an
16638 argument is passed in ARG_TYPE, the current register this argument or field
16639 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16640 argument or field starts at is passed in STARTING_BIT and the last used bit
16641 is kept in LAST_USED_BIT which is also updated accordingly. */
16643 static unsigned HOST_WIDE_INT
16644 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16645 uint32_t * padding_bits_to_clear
,
16646 unsigned starting_bit
, int * last_used_bit
)
16649 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16651 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16653 unsigned current_bit
= starting_bit
;
16655 long int offset
, size
;
16658 field
= TYPE_FIELDS (arg_type
);
16661 /* The offset within a structure is always an offset from
16662 the start of that structure. Make sure we take that into the
16663 calculation of the register based offset that we use here. */
16664 offset
= starting_bit
;
16665 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16668 /* This is the actual size of the field, for bitfields this is the
16669 bitfield width and not the container size. */
16670 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16672 if (*last_used_bit
!= offset
)
16674 if (offset
< *last_used_bit
)
16676 /* This field's offset is before the 'last_used_bit', that
16677 means this field goes on the next register. So we need to
16678 pad the rest of the current register and increase the
16679 register number. */
16681 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16684 padding_bits_to_clear
[*regno
] |= mask
;
16685 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16690 /* Otherwise we pad the bits between the last field's end and
16691 the start of the new field. */
16694 mask
= ((uint32_t)-1) >> (32 - offset
);
16695 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16696 padding_bits_to_clear
[*regno
] |= mask
;
16698 current_bit
= offset
;
16701 /* Calculate further padding bits for inner structs/unions too. */
16702 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16704 *last_used_bit
= current_bit
;
16705 not_to_clear_reg_mask
16706 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16707 padding_bits_to_clear
, offset
,
16712 /* Update 'current_bit' with this field's size. If the
16713 'current_bit' lies in a subsequent register, update 'regno' and
16714 reset 'current_bit' to point to the current bit in that new
16716 current_bit
+= size
;
16717 while (current_bit
>= 32)
16720 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16723 *last_used_bit
= current_bit
;
16726 field
= TREE_CHAIN (field
);
16728 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16730 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16732 tree field
, field_t
;
16733 int i
, regno_t
, field_size
;
16737 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16738 = {-1, -1, -1, -1};
16740 /* To compute the padding bits in a union we only consider bits as
16741 padding bits if they are always either a padding bit or fall outside a
16742 fields size for all fields in the union. */
16743 field
= TYPE_FIELDS (arg_type
);
16746 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16747 = {0U, 0U, 0U, 0U};
16748 int last_used_bit_t
= *last_used_bit
;
16750 field_t
= TREE_TYPE (field
);
16752 /* If the field's type is either a record or a union make sure to
16753 compute their padding bits too. */
16754 if (RECORD_OR_UNION_TYPE_P (field_t
))
16755 not_to_clear_reg_mask
16756 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16757 &padding_bits_to_clear_t
[0],
16758 starting_bit
, &last_used_bit_t
);
16761 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16762 regno_t
= (field_size
/ 32) + *regno
;
16763 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16766 for (i
= *regno
; i
< regno_t
; i
++)
16768 /* For all but the last register used by this field only keep the
16769 padding bits that were padding bits in this field. */
16770 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16773 /* For the last register, keep all padding bits that were padding
16774 bits in this field and any padding bits that are still valid
16775 as padding bits but fall outside of this field's size. */
16776 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16777 padding_bits_to_clear_res
[regno_t
]
16778 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16780 /* Update the maximum size of the fields in terms of registers used
16781 ('max_reg') and the 'last_used_bit' in said register. */
16782 if (max_reg
< regno_t
)
16785 max_bit
= last_used_bit_t
;
16787 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16788 max_bit
= last_used_bit_t
;
16790 field
= TREE_CHAIN (field
);
16793 /* Update the current padding_bits_to_clear using the intersection of the
16794 padding bits of all the fields. */
16795 for (i
=*regno
; i
< max_reg
; i
++)
16796 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16798 /* Do not keep trailing padding bits, we do not know yet whether this
16799 is the end of the argument. */
16800 mask
= ((uint32_t) 1 << max_bit
) - 1;
16801 padding_bits_to_clear
[max_reg
]
16802 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16805 *last_used_bit
= max_bit
;
16808 /* This function should only be used for structs and unions. */
16809 gcc_unreachable ();
16811 return not_to_clear_reg_mask
;
16814 /* In the context of ARMv8-M Security Extensions, this function is used for both
16815 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16816 registers are used when returning or passing arguments, which is then
16817 returned as a mask. It will also compute a mask to indicate padding/unused
16818 bits for each of these registers, and passes this through the
16819 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16820 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16821 the starting register used to pass this argument or return value is passed
16822 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16823 for struct and union types. */
16825 static unsigned HOST_WIDE_INT
16826 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16827 uint32_t * padding_bits_to_clear
)
16830 int last_used_bit
= 0;
16831 unsigned HOST_WIDE_INT not_to_clear_mask
;
16833 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16836 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16837 padding_bits_to_clear
, 0,
16841 /* If the 'last_used_bit' is not zero, that means we are still using a
16842 part of the last 'regno'. In such cases we must clear the trailing
16843 bits. Otherwise we are not using regno and we should mark it as to
16845 if (last_used_bit
!= 0)
16846 padding_bits_to_clear
[regno
]
16847 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16849 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16853 not_to_clear_mask
= 0;
16854 /* We are not dealing with structs nor unions. So these arguments may be
16855 passed in floating point registers too. In some cases a BLKmode is
16856 used when returning or passing arguments in multiple VFP registers. */
16857 if (GET_MODE (arg_rtx
) == BLKmode
)
16862 /* This should really only occur when dealing with the hard-float
16864 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16866 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16868 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16869 gcc_assert (REG_P (reg
));
16871 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16873 /* If we are dealing with DF mode, make sure we don't
16874 clear either of the registers it addresses. */
16875 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16878 unsigned HOST_WIDE_INT mask
;
16879 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16880 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16881 not_to_clear_mask
|= mask
;
16887 /* Otherwise we can rely on the MODE to determine how many registers
16888 are being used by this argument. */
16889 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16890 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16893 unsigned HOST_WIDE_INT
16894 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16895 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16896 not_to_clear_mask
|= mask
;
16901 return not_to_clear_mask
;
16904 /* Saves callee saved registers, clears callee saved registers and caller saved
16905 registers not used to pass arguments before a cmse_nonsecure_call. And
16906 restores the callee saved registers after. */
16909 cmse_nonsecure_call_clear_caller_saved (void)
16913 FOR_EACH_BB_FN (bb
, cfun
)
16917 FOR_BB_INSNS (bb
, insn
)
16919 uint64_t to_clear_mask
, float_mask
;
16921 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16922 unsigned int regno
, maxregno
;
16924 CUMULATIVE_ARGS args_so_far_v
;
16925 cumulative_args_t args_so_far
;
16926 tree arg_type
, fntype
;
16927 bool using_r4
, first_param
= true;
16928 function_args_iterator args_iter
;
16929 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
16930 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
16932 if (!NONDEBUG_INSN_P (insn
))
16935 if (!CALL_P (insn
))
16938 pat
= PATTERN (insn
);
16939 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
16940 call
= XVECEXP (pat
, 0, 0);
16942 /* Get the real call RTX if the insn sets a value, ie. returns. */
16943 if (GET_CODE (call
) == SET
)
16944 call
= SET_SRC (call
);
16946 /* Check if it is a cmse_nonsecure_call. */
16947 unspec
= XEXP (call
, 0);
16948 if (GET_CODE (unspec
) != UNSPEC
16949 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
16952 /* Determine the caller-saved registers we need to clear. */
16953 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
16954 maxregno
= NUM_ARG_REGS
- 1;
16955 /* Only look at the caller-saved floating point registers in case of
16956 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16957 lazy store and loads which clear both caller- and callee-saved
16959 if (TARGET_HARD_FLOAT_ABI
)
16961 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
16962 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
16963 to_clear_mask
|= float_mask
;
16964 maxregno
= D7_VFP_REGNUM
;
16967 /* Make sure the register used to hold the function address is not
16969 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
16970 gcc_assert (MEM_P (address
));
16971 gcc_assert (REG_P (XEXP (address
, 0)));
16972 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
16974 /* Set basic block of call insn so that df rescan is performed on
16975 insns inserted here. */
16976 set_block_for_insn (insn
, bb
);
16977 df_set_flags (DF_DEFER_INSN_RESCAN
);
16980 /* Make sure the scheduler doesn't schedule other insns beyond
16982 emit_insn (gen_blockage ());
16984 /* Walk through all arguments and clear registers appropriately.
16986 fntype
= TREE_TYPE (MEM_EXPR (address
));
16987 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
16989 args_so_far
= pack_cumulative_args (&args_so_far_v
);
16990 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
16993 machine_mode arg_mode
= TYPE_MODE (arg_type
);
16995 if (VOID_TYPE_P (arg_type
))
16999 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17002 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17004 gcc_assert (REG_P (arg_rtx
));
17006 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17008 padding_bits_to_clear_ptr
);
17010 first_param
= false;
17013 /* Clear padding bits where needed. */
17014 cleared_reg
= XEXP (address
, 0);
17015 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17017 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17019 if (padding_bits_to_clear
[regno
] == 0)
17022 /* If this is a Thumb-1 target copy the address of the function
17023 we are calling from 'r4' into 'ip' such that we can use r4 to
17024 clear the unused bits in the arguments. */
17025 if (TARGET_THUMB1
&& !using_r4
)
17029 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17033 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17034 emit_move_insn (reg
, tmp
);
17035 /* Also fill the top half of the negated
17036 padding_bits_to_clear. */
17037 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17039 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17040 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17046 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17047 gen_rtx_REG (SImode
, regno
),
17052 emit_move_insn (cleared_reg
,
17053 gen_rtx_REG (SImode
, IP_REGNUM
));
17055 /* We use right shift and left shift to clear the LSB of the address
17056 we jump to instead of using bic, to avoid having to use an extra
17057 register on Thumb-1. */
17058 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17059 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17060 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17061 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17063 /* Clearing all registers that leak before doing a non-secure
17065 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17067 if (!(to_clear_mask
& (1LL << regno
)))
17070 /* If regno is an even vfp register and its successor is also to
17071 be cleared, use vmov. */
17072 if (IS_VFP_REGNUM (regno
))
17074 if (TARGET_VFP_DOUBLE
17075 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17076 && to_clear_mask
& (1LL << (regno
+ 1)))
17077 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17078 CONST0_RTX (DFmode
));
17080 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17081 CONST0_RTX (SFmode
));
17084 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17087 seq
= get_insns ();
17089 emit_insn_before (seq
, insn
);
17095 /* Rewrite move insn into subtract of 0 if the condition codes will
17096 be useful in next conditional jump insn. */
17099 thumb1_reorg (void)
17103 FOR_EACH_BB_FN (bb
, cfun
)
17106 rtx cmp
, op0
, op1
, set
= NULL
;
17107 rtx_insn
*prev
, *insn
= BB_END (bb
);
17108 bool insn_clobbered
= false;
17110 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17111 insn
= PREV_INSN (insn
);
17113 /* Find the last cbranchsi4_insn in basic block BB. */
17114 if (insn
== BB_HEAD (bb
)
17115 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17118 /* Get the register with which we are comparing. */
17119 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17120 op0
= XEXP (cmp
, 0);
17121 op1
= XEXP (cmp
, 1);
17123 /* Check that comparison is against ZERO. */
17124 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17127 /* Find the first flag setting insn before INSN in basic block BB. */
17128 gcc_assert (insn
!= BB_HEAD (bb
));
17129 for (prev
= PREV_INSN (insn
);
17131 && prev
!= BB_HEAD (bb
)
17133 || DEBUG_INSN_P (prev
)
17134 || ((set
= single_set (prev
)) != NULL
17135 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17136 prev
= PREV_INSN (prev
))
17138 if (reg_set_p (op0
, prev
))
17139 insn_clobbered
= true;
17142 /* Skip if op0 is clobbered by insn other than prev. */
17143 if (insn_clobbered
)
17149 dest
= SET_DEST (set
);
17150 src
= SET_SRC (set
);
17151 if (!low_register_operand (dest
, SImode
)
17152 || !low_register_operand (src
, SImode
))
17155 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17156 in INSN. Both src and dest of the move insn are checked. */
17157 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17159 dest
= copy_rtx (dest
);
17160 src
= copy_rtx (src
);
17161 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17162 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17163 INSN_CODE (prev
) = -1;
17164 /* Set test register in INSN to dest. */
17165 XEXP (cmp
, 0) = copy_rtx (dest
);
17166 INSN_CODE (insn
) = -1;
17171 /* Convert instructions to their cc-clobbering variant if possible, since
17172 that allows us to use smaller encodings. */
17175 thumb2_reorg (void)
17180 INIT_REG_SET (&live
);
17182 /* We are freeing block_for_insn in the toplev to keep compatibility
17183 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17184 compute_bb_for_insn ();
17187 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17189 FOR_EACH_BB_FN (bb
, cfun
)
17191 if ((current_tune
->disparage_flag_setting_t16_encodings
17192 == tune_params::DISPARAGE_FLAGS_ALL
)
17193 && optimize_bb_for_speed_p (bb
))
17197 Convert_Action action
= SKIP
;
17198 Convert_Action action_for_partial_flag_setting
17199 = ((current_tune
->disparage_flag_setting_t16_encodings
17200 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17201 && optimize_bb_for_speed_p (bb
))
17204 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17205 df_simulate_initialize_backwards (bb
, &live
);
17206 FOR_BB_INSNS_REVERSE (bb
, insn
)
17208 if (NONJUMP_INSN_P (insn
)
17209 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17210 && GET_CODE (PATTERN (insn
)) == SET
)
17213 rtx pat
= PATTERN (insn
);
17214 rtx dst
= XEXP (pat
, 0);
17215 rtx src
= XEXP (pat
, 1);
17216 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17218 if (UNARY_P (src
) || BINARY_P (src
))
17219 op0
= XEXP (src
, 0);
17221 if (BINARY_P (src
))
17222 op1
= XEXP (src
, 1);
17224 if (low_register_operand (dst
, SImode
))
17226 switch (GET_CODE (src
))
17229 /* Adding two registers and storing the result
17230 in the first source is already a 16-bit
17232 if (rtx_equal_p (dst
, op0
)
17233 && register_operand (op1
, SImode
))
17236 if (low_register_operand (op0
, SImode
))
17238 /* ADDS <Rd>,<Rn>,<Rm> */
17239 if (low_register_operand (op1
, SImode
))
17241 /* ADDS <Rdn>,#<imm8> */
17242 /* SUBS <Rdn>,#<imm8> */
17243 else if (rtx_equal_p (dst
, op0
)
17244 && CONST_INT_P (op1
)
17245 && IN_RANGE (INTVAL (op1
), -255, 255))
17247 /* ADDS <Rd>,<Rn>,#<imm3> */
17248 /* SUBS <Rd>,<Rn>,#<imm3> */
17249 else if (CONST_INT_P (op1
)
17250 && IN_RANGE (INTVAL (op1
), -7, 7))
17253 /* ADCS <Rd>, <Rn> */
17254 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17255 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17256 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17258 && COMPARISON_P (op1
)
17259 && cc_register (XEXP (op1
, 0), VOIDmode
)
17260 && maybe_get_arm_condition_code (op1
) == ARM_CS
17261 && XEXP (op1
, 1) == const0_rtx
)
17266 /* RSBS <Rd>,<Rn>,#0
17267 Not handled here: see NEG below. */
17268 /* SUBS <Rd>,<Rn>,#<imm3>
17270 Not handled here: see PLUS above. */
17271 /* SUBS <Rd>,<Rn>,<Rm> */
17272 if (low_register_operand (op0
, SImode
)
17273 && low_register_operand (op1
, SImode
))
17278 /* MULS <Rdm>,<Rn>,<Rdm>
17279 As an exception to the rule, this is only used
17280 when optimizing for size since MULS is slow on all
17281 known implementations. We do not even want to use
17282 MULS in cold code, if optimizing for speed, so we
17283 test the global flag here. */
17284 if (!optimize_size
)
17286 /* Fall through. */
17290 /* ANDS <Rdn>,<Rm> */
17291 if (rtx_equal_p (dst
, op0
)
17292 && low_register_operand (op1
, SImode
))
17293 action
= action_for_partial_flag_setting
;
17294 else if (rtx_equal_p (dst
, op1
)
17295 && low_register_operand (op0
, SImode
))
17296 action
= action_for_partial_flag_setting
== SKIP
17297 ? SKIP
: SWAP_CONV
;
17303 /* ASRS <Rdn>,<Rm> */
17304 /* LSRS <Rdn>,<Rm> */
17305 /* LSLS <Rdn>,<Rm> */
17306 if (rtx_equal_p (dst
, op0
)
17307 && low_register_operand (op1
, SImode
))
17308 action
= action_for_partial_flag_setting
;
17309 /* ASRS <Rd>,<Rm>,#<imm5> */
17310 /* LSRS <Rd>,<Rm>,#<imm5> */
17311 /* LSLS <Rd>,<Rm>,#<imm5> */
17312 else if (low_register_operand (op0
, SImode
)
17313 && CONST_INT_P (op1
)
17314 && IN_RANGE (INTVAL (op1
), 0, 31))
17315 action
= action_for_partial_flag_setting
;
17319 /* RORS <Rdn>,<Rm> */
17320 if (rtx_equal_p (dst
, op0
)
17321 && low_register_operand (op1
, SImode
))
17322 action
= action_for_partial_flag_setting
;
17326 /* MVNS <Rd>,<Rm> */
17327 if (low_register_operand (op0
, SImode
))
17328 action
= action_for_partial_flag_setting
;
17332 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17333 if (low_register_operand (op0
, SImode
))
17338 /* MOVS <Rd>,#<imm8> */
17339 if (CONST_INT_P (src
)
17340 && IN_RANGE (INTVAL (src
), 0, 255))
17341 action
= action_for_partial_flag_setting
;
17345 /* MOVS and MOV<c> with registers have different
17346 encodings, so are not relevant here. */
17354 if (action
!= SKIP
)
17356 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17357 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17360 if (action
== SWAP_CONV
)
17362 src
= copy_rtx (src
);
17363 XEXP (src
, 0) = op1
;
17364 XEXP (src
, 1) = op0
;
17365 pat
= gen_rtx_SET (dst
, src
);
17366 vec
= gen_rtvec (2, pat
, clobber
);
17368 else /* action == CONV */
17369 vec
= gen_rtvec (2, pat
, clobber
);
17371 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17372 INSN_CODE (insn
) = -1;
17376 if (NONDEBUG_INSN_P (insn
))
17377 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17381 CLEAR_REG_SET (&live
);
17384 /* Gcc puts the pool in the wrong place for ARM, since we can only
17385 load addresses a limited distance around the pc. We do some
17386 special munging to move the constant pool values to the correct
17387 point in the code. */
17392 HOST_WIDE_INT address
= 0;
17396 cmse_nonsecure_call_clear_caller_saved ();
17399 else if (TARGET_THUMB2
)
17402 /* Ensure all insns that must be split have been split at this point.
17403 Otherwise, the pool placement code below may compute incorrect
17404 insn lengths. Note that when optimizing, all insns have already
17405 been split at this point. */
17407 split_all_insns_noflow ();
17409 /* Make sure we do not attempt to create a literal pool even though it should
17410 no longer be necessary to create any. */
17411 if (arm_disable_literal_pool
)
17414 minipool_fix_head
= minipool_fix_tail
= NULL
;
17416 /* The first insn must always be a note, or the code below won't
17417 scan it properly. */
17418 insn
= get_insns ();
17419 gcc_assert (NOTE_P (insn
));
17422 /* Scan all the insns and record the operands that will need fixing. */
17423 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17425 if (BARRIER_P (insn
))
17426 push_minipool_barrier (insn
, address
);
17427 else if (INSN_P (insn
))
17429 rtx_jump_table_data
*table
;
17431 note_invalid_constants (insn
, address
, true);
17432 address
+= get_attr_length (insn
);
17434 /* If the insn is a vector jump, add the size of the table
17435 and skip the table. */
17436 if (tablejump_p (insn
, NULL
, &table
))
17438 address
+= get_jump_table_size (table
);
17442 else if (LABEL_P (insn
))
17443 /* Add the worst-case padding due to alignment. We don't add
17444 the _current_ padding because the minipool insertions
17445 themselves might change it. */
17446 address
+= get_label_padding (insn
);
17449 fix
= minipool_fix_head
;
17451 /* Now scan the fixups and perform the required changes. */
17456 Mfix
* last_added_fix
;
17457 Mfix
* last_barrier
= NULL
;
17460 /* Skip any further barriers before the next fix. */
17461 while (fix
&& BARRIER_P (fix
->insn
))
17464 /* No more fixes. */
17468 last_added_fix
= NULL
;
17470 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17472 if (BARRIER_P (ftmp
->insn
))
17474 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17477 last_barrier
= ftmp
;
17479 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17482 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17485 /* If we found a barrier, drop back to that; any fixes that we
17486 could have reached but come after the barrier will now go in
17487 the next mini-pool. */
17488 if (last_barrier
!= NULL
)
17490 /* Reduce the refcount for those fixes that won't go into this
17492 for (fdel
= last_barrier
->next
;
17493 fdel
&& fdel
!= ftmp
;
17496 fdel
->minipool
->refcount
--;
17497 fdel
->minipool
= NULL
;
17500 ftmp
= last_barrier
;
17504 /* ftmp is first fix that we can't fit into this pool and
17505 there no natural barriers that we could use. Insert a
17506 new barrier in the code somewhere between the previous
17507 fix and this one, and arrange to jump around it. */
17508 HOST_WIDE_INT max_address
;
17510 /* The last item on the list of fixes must be a barrier, so
17511 we can never run off the end of the list of fixes without
17512 last_barrier being set. */
17515 max_address
= minipool_vector_head
->max_address
;
17516 /* Check that there isn't another fix that is in range that
17517 we couldn't fit into this pool because the pool was
17518 already too large: we need to put the pool before such an
17519 instruction. The pool itself may come just after the
17520 fix because create_fix_barrier also allows space for a
17521 jump instruction. */
17522 if (ftmp
->address
< max_address
)
17523 max_address
= ftmp
->address
+ 1;
17525 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17528 assign_minipool_offsets (last_barrier
);
17532 if (!BARRIER_P (ftmp
->insn
)
17533 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17540 /* Scan over the fixes we have identified for this pool, fixing them
17541 up and adding the constants to the pool itself. */
17542 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17543 this_fix
= this_fix
->next
)
17544 if (!BARRIER_P (this_fix
->insn
))
17547 = plus_constant (Pmode
,
17548 gen_rtx_LABEL_REF (VOIDmode
,
17549 minipool_vector_label
),
17550 this_fix
->minipool
->offset
);
17551 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17554 dump_minipool (last_barrier
->insn
);
17558 /* From now on we must synthesize any constants that we can't handle
17559 directly. This can happen if the RTL gets split during final
17560 instruction generation. */
17561 cfun
->machine
->after_arm_reorg
= 1;
17563 /* Free the minipool memory. */
17564 obstack_free (&minipool_obstack
, minipool_startobj
);
17567 /* Routines to output assembly language. */
17569 /* Return string representation of passed in real value. */
17570 static const char *
17571 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17573 if (!fp_consts_inited
)
17576 gcc_assert (real_equal (r
, &value_fp0
));
17580 /* OPERANDS[0] is the entire list of insns that constitute pop,
17581 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17582 is in the list, UPDATE is true iff the list contains explicit
17583 update of base register. */
17585 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17591 const char *conditional
;
17592 int num_saves
= XVECLEN (operands
[0], 0);
17593 unsigned int regno
;
17594 unsigned int regno_base
= REGNO (operands
[1]);
17595 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17598 offset
+= update
? 1 : 0;
17599 offset
+= return_pc
? 1 : 0;
17601 /* Is the base register in the list? */
17602 for (i
= offset
; i
< num_saves
; i
++)
17604 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17605 /* If SP is in the list, then the base register must be SP. */
17606 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17607 /* If base register is in the list, there must be no explicit update. */
17608 if (regno
== regno_base
)
17609 gcc_assert (!update
);
17612 conditional
= reverse
? "%?%D0" : "%?%d0";
17613 /* Can't use POP if returning from an interrupt. */
17614 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17615 sprintf (pattern
, "pop%s\t{", conditional
);
17618 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17619 It's just a convention, their semantics are identical. */
17620 if (regno_base
== SP_REGNUM
)
17621 sprintf (pattern
, "ldmfd%s\t", conditional
);
17623 sprintf (pattern
, "ldmia%s\t", conditional
);
17625 sprintf (pattern
, "ldm%s\t", conditional
);
17627 strcat (pattern
, reg_names
[regno_base
]);
17629 strcat (pattern
, "!, {");
17631 strcat (pattern
, ", {");
17634 /* Output the first destination register. */
17636 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17638 /* Output the rest of the destination registers. */
17639 for (i
= offset
+ 1; i
< num_saves
; i
++)
17641 strcat (pattern
, ", ");
17643 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17646 strcat (pattern
, "}");
17648 if (interrupt_p
&& return_pc
)
17649 strcat (pattern
, "^");
17651 output_asm_insn (pattern
, &cond
);
17655 /* Output the assembly for a store multiple. */
17658 vfp_output_vstmd (rtx
* operands
)
17664 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17665 ? XEXP (operands
[0], 0)
17666 : XEXP (XEXP (operands
[0], 0), 0);
17667 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17670 strcpy (pattern
, "vpush%?.64\t{%P1");
17672 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17674 p
= strlen (pattern
);
17676 gcc_assert (REG_P (operands
[1]));
17678 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17679 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17681 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17683 strcpy (&pattern
[p
], "}");
17685 output_asm_insn (pattern
, operands
);
17690 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17691 number of bytes pushed. */
17694 vfp_emit_fstmd (int base_reg
, int count
)
17701 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17702 register pairs are stored by a store multiple insn. We avoid this
17703 by pushing an extra pair. */
17704 if (count
== 2 && !arm_arch6
)
17706 if (base_reg
== LAST_VFP_REGNUM
- 3)
17711 /* FSTMD may not store more than 16 doubleword registers at once. Split
17712 larger stores into multiple parts (up to a maximum of two, in
17717 /* NOTE: base_reg is an internal register number, so each D register
17719 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17720 saved
+= vfp_emit_fstmd (base_reg
, 16);
17724 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17725 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17727 reg
= gen_rtx_REG (DFmode
, base_reg
);
17730 XVECEXP (par
, 0, 0)
17731 = gen_rtx_SET (gen_frame_mem
17733 gen_rtx_PRE_MODIFY (Pmode
,
17736 (Pmode
, stack_pointer_rtx
,
17739 gen_rtx_UNSPEC (BLKmode
,
17740 gen_rtvec (1, reg
),
17741 UNSPEC_PUSH_MULT
));
17743 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17744 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17745 RTX_FRAME_RELATED_P (tmp
) = 1;
17746 XVECEXP (dwarf
, 0, 0) = tmp
;
17748 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17749 RTX_FRAME_RELATED_P (tmp
) = 1;
17750 XVECEXP (dwarf
, 0, 1) = tmp
;
17752 for (i
= 1; i
< count
; i
++)
17754 reg
= gen_rtx_REG (DFmode
, base_reg
);
17756 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17758 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17759 plus_constant (Pmode
,
17763 RTX_FRAME_RELATED_P (tmp
) = 1;
17764 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17767 par
= emit_insn (par
);
17768 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17769 RTX_FRAME_RELATED_P (par
) = 1;
17774 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17775 has the cmse_nonsecure_call attribute and returns false otherwise. */
17778 detect_cmse_nonsecure_call (tree addr
)
17783 tree fntype
= TREE_TYPE (addr
);
17784 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17785 TYPE_ATTRIBUTES (fntype
)))
17791 /* Emit a call instruction with pattern PAT. ADDR is the address of
17792 the call target. */
17795 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17799 insn
= emit_call_insn (pat
);
17801 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17802 If the call might use such an entry, add a use of the PIC register
17803 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17804 if (TARGET_VXWORKS_RTP
17807 && GET_CODE (addr
) == SYMBOL_REF
17808 && (SYMBOL_REF_DECL (addr
)
17809 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17810 : !SYMBOL_REF_LOCAL_P (addr
)))
17812 require_pic_register ();
17813 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17816 if (TARGET_AAPCS_BASED
)
17818 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17819 linker. We need to add an IP clobber to allow setting
17820 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17821 is not needed since it's a fixed register. */
17822 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17823 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17827 /* Output a 'call' insn. */
17829 output_call (rtx
*operands
)
17831 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17833 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17834 if (REGNO (operands
[0]) == LR_REGNUM
)
17836 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17837 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17840 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17842 if (TARGET_INTERWORK
|| arm_arch4t
)
17843 output_asm_insn ("bx%?\t%0", operands
);
17845 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17850 /* Output a move from arm registers to arm registers of a long double
17851 OPERANDS[0] is the destination.
17852 OPERANDS[1] is the source. */
17854 output_mov_long_double_arm_from_arm (rtx
*operands
)
17856 /* We have to be careful here because the two might overlap. */
17857 int dest_start
= REGNO (operands
[0]);
17858 int src_start
= REGNO (operands
[1]);
17862 if (dest_start
< src_start
)
17864 for (i
= 0; i
< 3; i
++)
17866 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17867 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17868 output_asm_insn ("mov%?\t%0, %1", ops
);
17873 for (i
= 2; i
>= 0; i
--)
17875 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17876 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17877 output_asm_insn ("mov%?\t%0, %1", ops
);
17885 arm_emit_movpair (rtx dest
, rtx src
)
17887 /* If the src is an immediate, simplify it. */
17888 if (CONST_INT_P (src
))
17890 HOST_WIDE_INT val
= INTVAL (src
);
17891 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17892 if ((val
>> 16) & 0x0000ffff)
17894 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17896 GEN_INT ((val
>> 16) & 0x0000ffff));
17897 rtx_insn
*insn
= get_last_insn ();
17898 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17902 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17903 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17904 rtx_insn
*insn
= get_last_insn ();
17905 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17908 /* Output a move between double words. It must be REG<-MEM
17911 output_move_double (rtx
*operands
, bool emit
, int *count
)
17913 enum rtx_code code0
= GET_CODE (operands
[0]);
17914 enum rtx_code code1
= GET_CODE (operands
[1]);
17919 /* The only case when this might happen is when
17920 you are looking at the length of a DImode instruction
17921 that has an invalid constant in it. */
17922 if (code0
== REG
&& code1
!= MEM
)
17924 gcc_assert (!emit
);
17931 unsigned int reg0
= REGNO (operands
[0]);
17933 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
17935 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
17937 switch (GET_CODE (XEXP (operands
[1], 0)))
17944 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
17945 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
17947 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
17952 gcc_assert (TARGET_LDRD
);
17954 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
17961 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
17963 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
17971 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
17973 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
17978 gcc_assert (TARGET_LDRD
);
17980 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
17985 /* Autoicrement addressing modes should never have overlapping
17986 base and destination registers, and overlapping index registers
17987 are already prohibited, so this doesn't need to worry about
17989 otherops
[0] = operands
[0];
17990 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
17991 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
17993 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
17995 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
17997 /* Registers overlap so split out the increment. */
18000 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18001 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18008 /* Use a single insn if we can.
18009 FIXME: IWMMXT allows offsets larger than ldrd can
18010 handle, fix these up with a pair of ldr. */
18012 || !CONST_INT_P (otherops
[2])
18013 || (INTVAL (otherops
[2]) > -256
18014 && INTVAL (otherops
[2]) < 256))
18017 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18023 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18024 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18034 /* Use a single insn if we can.
18035 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18036 fix these up with a pair of ldr. */
18038 || !CONST_INT_P (otherops
[2])
18039 || (INTVAL (otherops
[2]) > -256
18040 && INTVAL (otherops
[2]) < 256))
18043 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18049 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18050 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18060 /* We might be able to use ldrd %0, %1 here. However the range is
18061 different to ldr/adr, and it is broken on some ARMv7-M
18062 implementations. */
18063 /* Use the second register of the pair to avoid problematic
18065 otherops
[1] = operands
[1];
18067 output_asm_insn ("adr%?\t%0, %1", otherops
);
18068 operands
[1] = otherops
[0];
18072 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18074 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18081 /* ??? This needs checking for thumb2. */
18083 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18084 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18086 otherops
[0] = operands
[0];
18087 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18088 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18090 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18092 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18094 switch ((int) INTVAL (otherops
[2]))
18098 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18104 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18110 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18114 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18115 operands
[1] = otherops
[0];
18117 && (REG_P (otherops
[2])
18119 || (CONST_INT_P (otherops
[2])
18120 && INTVAL (otherops
[2]) > -256
18121 && INTVAL (otherops
[2]) < 256)))
18123 if (reg_overlap_mentioned_p (operands
[0],
18126 /* Swap base and index registers over to
18127 avoid a conflict. */
18128 std::swap (otherops
[1], otherops
[2]);
18130 /* If both registers conflict, it will usually
18131 have been fixed by a splitter. */
18132 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18133 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18137 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18138 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18145 otherops
[0] = operands
[0];
18147 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18152 if (CONST_INT_P (otherops
[2]))
18156 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18157 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18165 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18171 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18178 return "ldrd%?\t%0, [%1]";
18180 return "ldmia%?\t%1, %M0";
18184 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18185 /* Take care of overlapping base/data reg. */
18186 if (reg_mentioned_p (operands
[0], operands
[1]))
18190 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18191 output_asm_insn ("ldr%?\t%0, %1", operands
);
18201 output_asm_insn ("ldr%?\t%0, %1", operands
);
18202 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18212 /* Constraints should ensure this. */
18213 gcc_assert (code0
== MEM
&& code1
== REG
);
18214 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18215 || (TARGET_ARM
&& TARGET_LDRD
));
18217 switch (GET_CODE (XEXP (operands
[0], 0)))
18223 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18225 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18230 gcc_assert (TARGET_LDRD
);
18232 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18239 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18241 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18249 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18251 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18256 gcc_assert (TARGET_LDRD
);
18258 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18263 otherops
[0] = operands
[1];
18264 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18265 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18267 /* IWMMXT allows offsets larger than ldrd can handle,
18268 fix these up with a pair of ldr. */
18270 && CONST_INT_P (otherops
[2])
18271 && (INTVAL(otherops
[2]) <= -256
18272 || INTVAL(otherops
[2]) >= 256))
18274 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18278 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18279 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18288 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18289 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18295 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18298 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18303 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18308 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18309 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18311 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18315 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18322 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18329 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18334 && (REG_P (otherops
[2])
18336 || (CONST_INT_P (otherops
[2])
18337 && INTVAL (otherops
[2]) > -256
18338 && INTVAL (otherops
[2]) < 256)))
18340 otherops
[0] = operands
[1];
18341 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18343 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18349 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18350 otherops
[1] = operands
[1];
18353 output_asm_insn ("str%?\t%1, %0", operands
);
18354 output_asm_insn ("str%?\t%H1, %0", otherops
);
18364 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18365 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18368 output_move_quad (rtx
*operands
)
18370 if (REG_P (operands
[0]))
18372 /* Load, or reg->reg move. */
18374 if (MEM_P (operands
[1]))
18376 switch (GET_CODE (XEXP (operands
[1], 0)))
18379 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18384 output_asm_insn ("adr%?\t%0, %1", operands
);
18385 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18389 gcc_unreachable ();
18397 gcc_assert (REG_P (operands
[1]));
18399 dest
= REGNO (operands
[0]);
18400 src
= REGNO (operands
[1]);
18402 /* This seems pretty dumb, but hopefully GCC won't try to do it
18405 for (i
= 0; i
< 4; i
++)
18407 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18408 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18409 output_asm_insn ("mov%?\t%0, %1", ops
);
18412 for (i
= 3; i
>= 0; i
--)
18414 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18415 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18416 output_asm_insn ("mov%?\t%0, %1", ops
);
18422 gcc_assert (MEM_P (operands
[0]));
18423 gcc_assert (REG_P (operands
[1]));
18424 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18426 switch (GET_CODE (XEXP (operands
[0], 0)))
18429 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18433 gcc_unreachable ();
18440 /* Output a VFP load or store instruction. */
18443 output_move_vfp (rtx
*operands
)
18445 rtx reg
, mem
, addr
, ops
[2];
18446 int load
= REG_P (operands
[0]);
18447 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18448 int sp
= (!TARGET_VFP_FP16INST
18449 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18450 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18455 reg
= operands
[!load
];
18456 mem
= operands
[load
];
18458 mode
= GET_MODE (reg
);
18460 gcc_assert (REG_P (reg
));
18461 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18462 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18468 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18469 gcc_assert (MEM_P (mem
));
18471 addr
= XEXP (mem
, 0);
18473 switch (GET_CODE (addr
))
18476 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18477 ops
[0] = XEXP (addr
, 0);
18482 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18483 ops
[0] = XEXP (addr
, 0);
18488 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18494 sprintf (buff
, templ
,
18495 load
? "ld" : "st",
18496 dp
? "64" : sp
? "32" : "16",
18498 integer_p
? "\t%@ int" : "");
18499 output_asm_insn (buff
, ops
);
18504 /* Output a Neon double-word or quad-word load or store, or a load
18505 or store for larger structure modes.
18507 WARNING: The ordering of elements is weird in big-endian mode,
18508 because the EABI requires that vectors stored in memory appear
18509 as though they were stored by a VSTM, as required by the EABI.
18510 GCC RTL defines element ordering based on in-memory order.
18511 This can be different from the architectural ordering of elements
18512 within a NEON register. The intrinsics defined in arm_neon.h use the
18513 NEON register element ordering, not the GCC RTL element ordering.
18515 For example, the in-memory ordering of a big-endian a quadword
18516 vector with 16-bit elements when stored from register pair {d0,d1}
18517 will be (lowest address first, d0[N] is NEON register element N):
18519 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18521 When necessary, quadword registers (dN, dN+1) are moved to ARM
18522 registers from rN in the order:
18524 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18526 So that STM/LDM can be used on vectors in ARM registers, and the
18527 same memory layout will result as if VSTM/VLDM were used.
18529 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18530 possible, which allows use of appropriate alignment tags.
18531 Note that the choice of "64" is independent of the actual vector
18532 element size; this size simply ensures that the behavior is
18533 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18535 Due to limitations of those instructions, use of VST1.64/VLD1.64
18536 is not possible if:
18537 - the address contains PRE_DEC, or
18538 - the mode refers to more than 4 double-word registers
18540 In those cases, it would be possible to replace VSTM/VLDM by a
18541 sequence of instructions; this is not currently implemented since
18542 this is not certain to actually improve performance. */
18545 output_move_neon (rtx
*operands
)
18547 rtx reg
, mem
, addr
, ops
[2];
18548 int regno
, nregs
, load
= REG_P (operands
[0]);
18553 reg
= operands
[!load
];
18554 mem
= operands
[load
];
18556 mode
= GET_MODE (reg
);
18558 gcc_assert (REG_P (reg
));
18559 regno
= REGNO (reg
);
18560 nregs
= HARD_REGNO_NREGS (regno
, mode
) / 2;
18561 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18562 || NEON_REGNO_OK_FOR_QUAD (regno
));
18563 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18564 || VALID_NEON_QREG_MODE (mode
)
18565 || VALID_NEON_STRUCT_MODE (mode
));
18566 gcc_assert (MEM_P (mem
));
18568 addr
= XEXP (mem
, 0);
18570 /* Strip off const from addresses like (const (plus (...))). */
18571 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18572 addr
= XEXP (addr
, 0);
18574 switch (GET_CODE (addr
))
18577 /* We have to use vldm / vstm for too-large modes. */
18580 templ
= "v%smia%%?\t%%0!, %%h1";
18581 ops
[0] = XEXP (addr
, 0);
18585 templ
= "v%s1.64\t%%h1, %%A0";
18592 /* We have to use vldm / vstm in this case, since there is no
18593 pre-decrement form of the vld1 / vst1 instructions. */
18594 templ
= "v%smdb%%?\t%%0!, %%h1";
18595 ops
[0] = XEXP (addr
, 0);
18600 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18601 gcc_unreachable ();
18604 /* We have to use vldm / vstm for too-large modes. */
18608 templ
= "v%smia%%?\t%%m0, %%h1";
18610 templ
= "v%s1.64\t%%h1, %%A0";
18616 /* Fall through. */
18622 for (i
= 0; i
< nregs
; i
++)
18624 /* We're only using DImode here because it's a convenient size. */
18625 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18626 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18627 if (reg_overlap_mentioned_p (ops
[0], mem
))
18629 gcc_assert (overlap
== -1);
18634 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18635 output_asm_insn (buff
, ops
);
18640 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18641 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18642 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18643 output_asm_insn (buff
, ops
);
18650 gcc_unreachable ();
18653 sprintf (buff
, templ
, load
? "ld" : "st");
18654 output_asm_insn (buff
, ops
);
18659 /* Compute and return the length of neon_mov<mode>, where <mode> is
18660 one of VSTRUCT modes: EI, OI, CI or XI. */
18662 arm_attr_length_move_neon (rtx_insn
*insn
)
18664 rtx reg
, mem
, addr
;
18668 extract_insn_cached (insn
);
18670 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18672 mode
= GET_MODE (recog_data
.operand
[0]);
18683 gcc_unreachable ();
18687 load
= REG_P (recog_data
.operand
[0]);
18688 reg
= recog_data
.operand
[!load
];
18689 mem
= recog_data
.operand
[load
];
18691 gcc_assert (MEM_P (mem
));
18693 mode
= GET_MODE (reg
);
18694 addr
= XEXP (mem
, 0);
18696 /* Strip off const from addresses like (const (plus (...))). */
18697 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18698 addr
= XEXP (addr
, 0);
18700 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18702 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
18709 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18713 arm_address_offset_is_imm (rtx_insn
*insn
)
18717 extract_insn_cached (insn
);
18719 if (REG_P (recog_data
.operand
[0]))
18722 mem
= recog_data
.operand
[0];
18724 gcc_assert (MEM_P (mem
));
18726 addr
= XEXP (mem
, 0);
18729 || (GET_CODE (addr
) == PLUS
18730 && REG_P (XEXP (addr
, 0))
18731 && CONST_INT_P (XEXP (addr
, 1))))
18737 /* Output an ADD r, s, #n where n may be too big for one instruction.
18738 If adding zero to one register, output nothing. */
18740 output_add_immediate (rtx
*operands
)
18742 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18744 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18747 output_multi_immediate (operands
,
18748 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18751 output_multi_immediate (operands
,
18752 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18759 /* Output a multiple immediate operation.
18760 OPERANDS is the vector of operands referred to in the output patterns.
18761 INSTR1 is the output pattern to use for the first constant.
18762 INSTR2 is the output pattern to use for subsequent constants.
18763 IMMED_OP is the index of the constant slot in OPERANDS.
18764 N is the constant value. */
18765 static const char *
18766 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18767 int immed_op
, HOST_WIDE_INT n
)
18769 #if HOST_BITS_PER_WIDE_INT > 32
18775 /* Quick and easy output. */
18776 operands
[immed_op
] = const0_rtx
;
18777 output_asm_insn (instr1
, operands
);
18782 const char * instr
= instr1
;
18784 /* Note that n is never zero here (which would give no output). */
18785 for (i
= 0; i
< 32; i
+= 2)
18789 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18790 output_asm_insn (instr
, operands
);
18800 /* Return the name of a shifter operation. */
18801 static const char *
18802 arm_shift_nmem(enum rtx_code code
)
18807 return ARM_LSL_NAME
;
18823 /* Return the appropriate ARM instruction for the operation code.
18824 The returned result should not be overwritten. OP is the rtx of the
18825 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18828 arithmetic_instr (rtx op
, int shift_first_arg
)
18830 switch (GET_CODE (op
))
18836 return shift_first_arg
? "rsb" : "sub";
18851 return arm_shift_nmem(GET_CODE(op
));
18854 gcc_unreachable ();
18858 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18859 for the operation code. The returned result should not be overwritten.
18860 OP is the rtx code of the shift.
18861 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18863 static const char *
18864 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18867 enum rtx_code code
= GET_CODE (op
);
18872 if (!CONST_INT_P (XEXP (op
, 1)))
18874 output_operand_lossage ("invalid shift operand");
18879 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18887 mnem
= arm_shift_nmem(code
);
18888 if (CONST_INT_P (XEXP (op
, 1)))
18890 *amountp
= INTVAL (XEXP (op
, 1));
18892 else if (REG_P (XEXP (op
, 1)))
18899 output_operand_lossage ("invalid shift operand");
18905 /* We never have to worry about the amount being other than a
18906 power of 2, since this case can never be reloaded from a reg. */
18907 if (!CONST_INT_P (XEXP (op
, 1)))
18909 output_operand_lossage ("invalid shift operand");
18913 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18915 /* Amount must be a power of two. */
18916 if (*amountp
& (*amountp
- 1))
18918 output_operand_lossage ("invalid shift operand");
18922 *amountp
= exact_log2 (*amountp
);
18923 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18924 return ARM_LSL_NAME
;
18927 output_operand_lossage ("invalid shift operand");
18931 /* This is not 100% correct, but follows from the desire to merge
18932 multiplication by a power of 2 with the recognizer for a
18933 shift. >=32 is not a valid shift for "lsl", so we must try and
18934 output a shift that produces the correct arithmetical result.
18935 Using lsr #32 is identical except for the fact that the carry bit
18936 is not set correctly if we set the flags; but we never use the
18937 carry bit from such an operation, so we can ignore that. */
18938 if (code
== ROTATERT
)
18939 /* Rotate is just modulo 32. */
18941 else if (*amountp
!= (*amountp
& 31))
18943 if (code
== ASHIFT
)
18948 /* Shifts of 0 are no-ops. */
18955 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18956 because /bin/as is horribly restrictive. The judgement about
18957 whether or not each character is 'printable' (and can be output as
18958 is) or not (and must be printed with an octal escape) must be made
18959 with reference to the *host* character set -- the situation is
18960 similar to that discussed in the comments above pp_c_char in
18961 c-pretty-print.c. */
18963 #define MAX_ASCII_LEN 51
18966 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
18969 int len_so_far
= 0;
18971 fputs ("\t.ascii\t\"", stream
);
18973 for (i
= 0; i
< len
; i
++)
18977 if (len_so_far
>= MAX_ASCII_LEN
)
18979 fputs ("\"\n\t.ascii\t\"", stream
);
18985 if (c
== '\\' || c
== '\"')
18987 putc ('\\', stream
);
18995 fprintf (stream
, "\\%03o", c
);
19000 fputs ("\"\n", stream
);
19003 /* Whether a register is callee saved or not. This is necessary because high
19004 registers are marked as caller saved when optimizing for size on Thumb-1
19005 targets despite being callee saved in order to avoid using them. */
19006 #define callee_saved_reg_p(reg) \
19007 (!call_used_regs[reg] \
19008 || (TARGET_THUMB1 && optimize_size \
19009 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19011 /* Compute the register save mask for registers 0 through 12
19012 inclusive. This code is used by arm_compute_save_reg_mask. */
19014 static unsigned long
19015 arm_compute_save_reg0_reg12_mask (void)
19017 unsigned long func_type
= arm_current_func_type ();
19018 unsigned long save_reg_mask
= 0;
19021 if (IS_INTERRUPT (func_type
))
19023 unsigned int max_reg
;
19024 /* Interrupt functions must not corrupt any registers,
19025 even call clobbered ones. If this is a leaf function
19026 we can just examine the registers used by the RTL, but
19027 otherwise we have to assume that whatever function is
19028 called might clobber anything, and so we have to save
19029 all the call-clobbered registers as well. */
19030 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19031 /* FIQ handlers have registers r8 - r12 banked, so
19032 we only need to check r0 - r7, Normal ISRs only
19033 bank r14 and r15, so we must check up to r12.
19034 r13 is the stack pointer which is always preserved,
19035 so we do not need to consider it here. */
19040 for (reg
= 0; reg
<= max_reg
; reg
++)
19041 if (df_regs_ever_live_p (reg
)
19042 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19043 save_reg_mask
|= (1 << reg
);
19045 /* Also save the pic base register if necessary. */
19047 && !TARGET_SINGLE_PIC_BASE
19048 && arm_pic_register
!= INVALID_REGNUM
19049 && crtl
->uses_pic_offset_table
)
19050 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19052 else if (IS_VOLATILE(func_type
))
19054 /* For noreturn functions we historically omitted register saves
19055 altogether. However this really messes up debugging. As a
19056 compromise save just the frame pointers. Combined with the link
19057 register saved elsewhere this should be sufficient to get
19059 if (frame_pointer_needed
)
19060 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19061 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19062 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19063 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19064 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19068 /* In the normal case we only need to save those registers
19069 which are call saved and which are used by this function. */
19070 for (reg
= 0; reg
<= 11; reg
++)
19071 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19072 save_reg_mask
|= (1 << reg
);
19074 /* Handle the frame pointer as a special case. */
19075 if (frame_pointer_needed
)
19076 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19078 /* If we aren't loading the PIC register,
19079 don't stack it even though it may be live. */
19081 && !TARGET_SINGLE_PIC_BASE
19082 && arm_pic_register
!= INVALID_REGNUM
19083 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19084 || crtl
->uses_pic_offset_table
))
19085 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19087 /* The prologue will copy SP into R0, so save it. */
19088 if (IS_STACKALIGN (func_type
))
19089 save_reg_mask
|= 1;
19092 /* Save registers so the exception handler can modify them. */
19093 if (crtl
->calls_eh_return
)
19099 reg
= EH_RETURN_DATA_REGNO (i
);
19100 if (reg
== INVALID_REGNUM
)
19102 save_reg_mask
|= 1 << reg
;
19106 return save_reg_mask
;
19109 /* Return true if r3 is live at the start of the function. */
19112 arm_r3_live_at_start_p (void)
19114 /* Just look at cfg info, which is still close enough to correct at this
19115 point. This gives false positives for broken functions that might use
19116 uninitialized data that happens to be allocated in r3, but who cares? */
19117 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19120 /* Compute the number of bytes used to store the static chain register on the
19121 stack, above the stack frame. We need to know this accurately to get the
19122 alignment of the rest of the stack frame correct. */
19125 arm_compute_static_chain_stack_bytes (void)
19127 /* See the defining assertion in arm_expand_prologue. */
19128 if (IS_NESTED (arm_current_func_type ())
19129 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19130 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19131 && !df_regs_ever_live_p (LR_REGNUM
)))
19132 && arm_r3_live_at_start_p ()
19133 && crtl
->args
.pretend_args_size
== 0)
19139 /* Compute a bit mask of which registers need to be
19140 saved on the stack for the current function.
19141 This is used by arm_get_frame_offsets, which may add extra registers. */
19143 static unsigned long
19144 arm_compute_save_reg_mask (void)
19146 unsigned int save_reg_mask
= 0;
19147 unsigned long func_type
= arm_current_func_type ();
19150 if (IS_NAKED (func_type
))
19151 /* This should never really happen. */
19154 /* If we are creating a stack frame, then we must save the frame pointer,
19155 IP (which will hold the old stack pointer), LR and the PC. */
19156 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19158 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19161 | (1 << PC_REGNUM
);
19163 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19165 /* Decide if we need to save the link register.
19166 Interrupt routines have their own banked link register,
19167 so they never need to save it.
19168 Otherwise if we do not use the link register we do not need to save
19169 it. If we are pushing other registers onto the stack however, we
19170 can save an instruction in the epilogue by pushing the link register
19171 now and then popping it back into the PC. This incurs extra memory
19172 accesses though, so we only do it when optimizing for size, and only
19173 if we know that we will not need a fancy return sequence. */
19174 if (df_regs_ever_live_p (LR_REGNUM
)
19177 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19178 && !crtl
->tail_call_emit
19179 && !crtl
->calls_eh_return
))
19180 save_reg_mask
|= 1 << LR_REGNUM
;
19182 if (cfun
->machine
->lr_save_eliminated
)
19183 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19185 if (TARGET_REALLY_IWMMXT
19186 && ((bit_count (save_reg_mask
)
19187 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19188 arm_compute_static_chain_stack_bytes())
19191 /* The total number of registers that are going to be pushed
19192 onto the stack is odd. We need to ensure that the stack
19193 is 64-bit aligned before we start to save iWMMXt registers,
19194 and also before we start to create locals. (A local variable
19195 might be a double or long long which we will load/store using
19196 an iWMMXt instruction). Therefore we need to push another
19197 ARM register, so that the stack will be 64-bit aligned. We
19198 try to avoid using the arg registers (r0 -r3) as they might be
19199 used to pass values in a tail call. */
19200 for (reg
= 4; reg
<= 12; reg
++)
19201 if ((save_reg_mask
& (1 << reg
)) == 0)
19205 save_reg_mask
|= (1 << reg
);
19208 cfun
->machine
->sibcall_blocked
= 1;
19209 save_reg_mask
|= (1 << 3);
19213 /* We may need to push an additional register for use initializing the
19214 PIC base register. */
19215 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19216 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19218 reg
= thumb_find_work_register (1 << 4);
19219 if (!call_used_regs
[reg
])
19220 save_reg_mask
|= (1 << reg
);
19223 return save_reg_mask
;
19226 /* Compute a bit mask of which registers need to be
19227 saved on the stack for the current function. */
19228 static unsigned long
19229 thumb1_compute_save_reg_mask (void)
19231 unsigned long mask
;
19235 for (reg
= 0; reg
< 12; reg
++)
19236 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19239 /* Handle the frame pointer as a special case. */
19240 if (frame_pointer_needed
)
19241 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19244 && !TARGET_SINGLE_PIC_BASE
19245 && arm_pic_register
!= INVALID_REGNUM
19246 && crtl
->uses_pic_offset_table
)
19247 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19249 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19250 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19251 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19253 /* LR will also be pushed if any lo regs are pushed. */
19254 if (mask
& 0xff || thumb_force_lr_save ())
19255 mask
|= (1 << LR_REGNUM
);
19257 /* Make sure we have a low work register if we need one.
19258 We will need one if we are going to push a high register,
19259 but we are not currently intending to push a low register. */
19260 if ((mask
& 0xff) == 0
19261 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19263 /* Use thumb_find_work_register to choose which register
19264 we will use. If the register is live then we will
19265 have to push it. Use LAST_LO_REGNUM as our fallback
19266 choice for the register to select. */
19267 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19268 /* Make sure the register returned by thumb_find_work_register is
19269 not part of the return value. */
19270 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19271 reg
= LAST_LO_REGNUM
;
19273 if (callee_saved_reg_p (reg
))
19277 /* The 504 below is 8 bytes less than 512 because there are two possible
19278 alignment words. We can't tell here if they will be present or not so we
19279 have to play it safe and assume that they are. */
19280 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19281 ROUND_UP_WORD (get_frame_size ()) +
19282 crtl
->outgoing_args_size
) >= 504)
19284 /* This is the same as the code in thumb1_expand_prologue() which
19285 determines which register to use for stack decrement. */
19286 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19287 if (mask
& (1 << reg
))
19290 if (reg
> LAST_LO_REGNUM
)
19292 /* Make sure we have a register available for stack decrement. */
19293 mask
|= 1 << LAST_LO_REGNUM
;
19301 /* Return the number of bytes required to save VFP registers. */
19303 arm_get_vfp_saved_size (void)
19305 unsigned int regno
;
19310 /* Space for saved VFP registers. */
19311 if (TARGET_HARD_FLOAT
)
19314 for (regno
= FIRST_VFP_REGNUM
;
19315 regno
< LAST_VFP_REGNUM
;
19318 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19319 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19323 /* Workaround ARM10 VFPr1 bug. */
19324 if (count
== 2 && !arm_arch6
)
19326 saved
+= count
* 8;
19335 if (count
== 2 && !arm_arch6
)
19337 saved
+= count
* 8;
19344 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19345 everything bar the final return instruction. If simple_return is true,
19346 then do not output epilogue, because it has already been emitted in RTL. */
19348 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19349 bool simple_return
)
19351 char conditional
[10];
19354 unsigned long live_regs_mask
;
19355 unsigned long func_type
;
19356 arm_stack_offsets
*offsets
;
19358 func_type
= arm_current_func_type ();
19360 if (IS_NAKED (func_type
))
19363 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19365 /* If this function was declared non-returning, and we have
19366 found a tail call, then we have to trust that the called
19367 function won't return. */
19372 /* Otherwise, trap an attempted return by aborting. */
19374 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19376 assemble_external_libcall (ops
[1]);
19377 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19383 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19385 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19387 cfun
->machine
->return_used_this_function
= 1;
19389 offsets
= arm_get_frame_offsets ();
19390 live_regs_mask
= offsets
->saved_regs_mask
;
19392 if (!simple_return
&& live_regs_mask
)
19394 const char * return_reg
;
19396 /* If we do not have any special requirements for function exit
19397 (e.g. interworking) then we can load the return address
19398 directly into the PC. Otherwise we must load it into LR. */
19400 && !IS_CMSE_ENTRY (func_type
)
19401 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19402 return_reg
= reg_names
[PC_REGNUM
];
19404 return_reg
= reg_names
[LR_REGNUM
];
19406 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19408 /* There are three possible reasons for the IP register
19409 being saved. 1) a stack frame was created, in which case
19410 IP contains the old stack pointer, or 2) an ISR routine
19411 corrupted it, or 3) it was saved to align the stack on
19412 iWMMXt. In case 1, restore IP into SP, otherwise just
19414 if (frame_pointer_needed
)
19416 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19417 live_regs_mask
|= (1 << SP_REGNUM
);
19420 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19423 /* On some ARM architectures it is faster to use LDR rather than
19424 LDM to load a single register. On other architectures, the
19425 cost is the same. In 26 bit mode, or for exception handlers,
19426 we have to use LDM to load the PC so that the CPSR is also
19428 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19429 if (live_regs_mask
== (1U << reg
))
19432 if (reg
<= LAST_ARM_REGNUM
19433 && (reg
!= LR_REGNUM
19435 || ! IS_INTERRUPT (func_type
)))
19437 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19438 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19445 /* Generate the load multiple instruction to restore the
19446 registers. Note we can get here, even if
19447 frame_pointer_needed is true, but only if sp already
19448 points to the base of the saved core registers. */
19449 if (live_regs_mask
& (1 << SP_REGNUM
))
19451 unsigned HOST_WIDE_INT stack_adjust
;
19453 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19454 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19456 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19457 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19460 /* If we can't use ldmib (SA110 bug),
19461 then try to pop r3 instead. */
19463 live_regs_mask
|= 1 << 3;
19465 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19468 /* For interrupt returns we have to use an LDM rather than
19469 a POP so that we can use the exception return variant. */
19470 else if (IS_INTERRUPT (func_type
))
19471 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19473 sprintf (instr
, "pop%s\t{", conditional
);
19475 p
= instr
+ strlen (instr
);
19477 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19478 if (live_regs_mask
& (1 << reg
))
19480 int l
= strlen (reg_names
[reg
]);
19486 memcpy (p
, ", ", 2);
19490 memcpy (p
, "%|", 2);
19491 memcpy (p
+ 2, reg_names
[reg
], l
);
19495 if (live_regs_mask
& (1 << LR_REGNUM
))
19497 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19498 /* If returning from an interrupt, restore the CPSR. */
19499 if (IS_INTERRUPT (func_type
))
19506 output_asm_insn (instr
, & operand
);
19508 /* See if we need to generate an extra instruction to
19509 perform the actual function return. */
19511 && func_type
!= ARM_FT_INTERWORKED
19512 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19514 /* The return has already been handled
19515 by loading the LR into the PC. */
19522 switch ((int) ARM_FUNC_TYPE (func_type
))
19526 /* ??? This is wrong for unified assembly syntax. */
19527 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19530 case ARM_FT_INTERWORKED
:
19531 gcc_assert (arm_arch5
|| arm_arch4t
);
19532 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19535 case ARM_FT_EXCEPTION
:
19536 /* ??? This is wrong for unified assembly syntax. */
19537 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19541 if (IS_CMSE_ENTRY (func_type
))
19543 /* Check if we have to clear the 'GE bits' which is only used if
19544 parallel add and subtraction instructions are available. */
19545 if (TARGET_INT_SIMD
)
19546 snprintf (instr
, sizeof (instr
),
19547 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19549 snprintf (instr
, sizeof (instr
),
19550 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19552 output_asm_insn (instr
, & operand
);
19553 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19555 /* Clear the cumulative exception-status bits (0-4,7) and the
19556 condition code bits (28-31) of the FPSCR. We need to
19557 remember to clear the first scratch register used (IP) and
19558 save and restore the second (r4). */
19559 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19560 output_asm_insn (instr
, & operand
);
19561 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19562 output_asm_insn (instr
, & operand
);
19563 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19564 output_asm_insn (instr
, & operand
);
19565 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19566 output_asm_insn (instr
, & operand
);
19567 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19568 output_asm_insn (instr
, & operand
);
19569 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19570 output_asm_insn (instr
, & operand
);
19571 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19572 output_asm_insn (instr
, & operand
);
19573 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19574 output_asm_insn (instr
, & operand
);
19576 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19578 /* Use bx if it's available. */
19579 else if (arm_arch5
|| arm_arch4t
)
19580 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19582 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19586 output_asm_insn (instr
, & operand
);
19592 /* Output in FILE asm statements needed to declare the NAME of the function
19593 defined by its DECL node. */
19596 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19598 size_t cmse_name_len
;
19599 char *cmse_name
= 0;
19600 char cmse_prefix
[] = "__acle_se_";
19602 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19603 extra function label for each function with the 'cmse_nonsecure_entry'
19604 attribute. This extra function label should be prepended with
19605 '__acle_se_', telling the linker that it needs to create secure gateway
19606 veneers for this function. */
19607 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19608 DECL_ATTRIBUTES (decl
)))
19610 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19611 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19612 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19613 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19615 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19616 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19619 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19620 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19621 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19622 ASM_OUTPUT_LABEL (file
, name
);
19625 ASM_OUTPUT_LABEL (file
, cmse_name
);
19627 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19630 /* Write the function name into the code section, directly preceding
19631 the function prologue.
19633 Code will be output similar to this:
19635 .ascii "arm_poke_function_name", 0
19638 .word 0xff000000 + (t1 - t0)
19639 arm_poke_function_name
19641 stmfd sp!, {fp, ip, lr, pc}
19644 When performing a stack backtrace, code can inspect the value
19645 of 'pc' stored at 'fp' + 0. If the trace function then looks
19646 at location pc - 12 and the top 8 bits are set, then we know
19647 that there is a function name embedded immediately preceding this
19648 location and has length ((pc[-3]) & 0xff000000).
19650 We assume that pc is declared as a pointer to an unsigned long.
19652 It is of no benefit to output the function name if we are assembling
19653 a leaf function. These function types will not contain a stack
19654 backtrace structure, therefore it is not possible to determine the
19657 arm_poke_function_name (FILE *stream
, const char *name
)
19659 unsigned long alignlength
;
19660 unsigned long length
;
19663 length
= strlen (name
) + 1;
19664 alignlength
= ROUND_UP_WORD (length
);
19666 ASM_OUTPUT_ASCII (stream
, name
, length
);
19667 ASM_OUTPUT_ALIGN (stream
, 2);
19668 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19669 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19672 /* Place some comments into the assembler stream
19673 describing the current function. */
19675 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
19677 unsigned long func_type
;
19679 /* Sanity check. */
19680 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19682 func_type
= arm_current_func_type ();
19684 switch ((int) ARM_FUNC_TYPE (func_type
))
19687 case ARM_FT_NORMAL
:
19689 case ARM_FT_INTERWORKED
:
19690 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19693 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19696 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19698 case ARM_FT_EXCEPTION
:
19699 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19703 if (IS_NAKED (func_type
))
19704 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19706 if (IS_VOLATILE (func_type
))
19707 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19709 if (IS_NESTED (func_type
))
19710 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19711 if (IS_STACKALIGN (func_type
))
19712 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19713 if (IS_CMSE_ENTRY (func_type
))
19714 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19716 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19718 crtl
->args
.pretend_args_size
, frame_size
);
19720 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19721 frame_pointer_needed
,
19722 cfun
->machine
->uses_anonymous_args
);
19724 if (cfun
->machine
->lr_save_eliminated
)
19725 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19727 if (crtl
->calls_eh_return
)
19728 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19733 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
19734 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
19736 arm_stack_offsets
*offsets
;
19742 /* Emit any call-via-reg trampolines that are needed for v4t support
19743 of call_reg and call_value_reg type insns. */
19744 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19746 rtx label
= cfun
->machine
->call_via
[regno
];
19750 switch_to_section (function_section (current_function_decl
));
19751 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19752 CODE_LABEL_NUMBER (label
));
19753 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19757 /* ??? Probably not safe to set this here, since it assumes that a
19758 function will be emitted as assembly immediately after we generate
19759 RTL for it. This does not happen for inline functions. */
19760 cfun
->machine
->return_used_this_function
= 0;
19762 else /* TARGET_32BIT */
19764 /* We need to take into account any stack-frame rounding. */
19765 offsets
= arm_get_frame_offsets ();
19767 gcc_assert (!use_return_insn (FALSE
, NULL
)
19768 || (cfun
->machine
->return_used_this_function
!= 0)
19769 || offsets
->saved_regs
== offsets
->outgoing_args
19770 || frame_pointer_needed
);
19774 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19775 STR and STRD. If an even number of registers are being pushed, one
19776 or more STRD patterns are created for each register pair. If an
19777 odd number of registers are pushed, emit an initial STR followed by
19778 as many STRD instructions as are needed. This works best when the
19779 stack is initially 64-bit aligned (the normal case), since it
19780 ensures that each STRD is also 64-bit aligned. */
19782 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19787 rtx par
= NULL_RTX
;
19788 rtx dwarf
= NULL_RTX
;
19792 num_regs
= bit_count (saved_regs_mask
);
19794 /* Must be at least one register to save, and can't save SP or PC. */
19795 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19796 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19797 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19799 /* Create sequence for DWARF info. All the frame-related data for
19800 debugging is held in this wrapper. */
19801 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19803 /* Describe the stack adjustment. */
19804 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19805 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19806 RTX_FRAME_RELATED_P (tmp
) = 1;
19807 XVECEXP (dwarf
, 0, 0) = tmp
;
19809 /* Find the first register. */
19810 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19815 /* If there's an odd number of registers to push. Start off by
19816 pushing a single register. This ensures that subsequent strd
19817 operations are dword aligned (assuming that SP was originally
19818 64-bit aligned). */
19819 if ((num_regs
& 1) != 0)
19821 rtx reg
, mem
, insn
;
19823 reg
= gen_rtx_REG (SImode
, regno
);
19825 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19826 stack_pointer_rtx
));
19828 mem
= gen_frame_mem (Pmode
,
19830 (Pmode
, stack_pointer_rtx
,
19831 plus_constant (Pmode
, stack_pointer_rtx
,
19834 tmp
= gen_rtx_SET (mem
, reg
);
19835 RTX_FRAME_RELATED_P (tmp
) = 1;
19836 insn
= emit_insn (tmp
);
19837 RTX_FRAME_RELATED_P (insn
) = 1;
19838 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19839 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19840 RTX_FRAME_RELATED_P (tmp
) = 1;
19843 XVECEXP (dwarf
, 0, i
) = tmp
;
19847 while (i
< num_regs
)
19848 if (saved_regs_mask
& (1 << regno
))
19850 rtx reg1
, reg2
, mem1
, mem2
;
19851 rtx tmp0
, tmp1
, tmp2
;
19854 /* Find the register to pair with this one. */
19855 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19859 reg1
= gen_rtx_REG (SImode
, regno
);
19860 reg2
= gen_rtx_REG (SImode
, regno2
);
19867 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19870 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19872 -4 * (num_regs
- 1)));
19873 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19874 plus_constant (Pmode
, stack_pointer_rtx
,
19876 tmp1
= gen_rtx_SET (mem1
, reg1
);
19877 tmp2
= gen_rtx_SET (mem2
, reg2
);
19878 RTX_FRAME_RELATED_P (tmp0
) = 1;
19879 RTX_FRAME_RELATED_P (tmp1
) = 1;
19880 RTX_FRAME_RELATED_P (tmp2
) = 1;
19881 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19882 XVECEXP (par
, 0, 0) = tmp0
;
19883 XVECEXP (par
, 0, 1) = tmp1
;
19884 XVECEXP (par
, 0, 2) = tmp2
;
19885 insn
= emit_insn (par
);
19886 RTX_FRAME_RELATED_P (insn
) = 1;
19887 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19891 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19894 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19897 tmp1
= gen_rtx_SET (mem1
, reg1
);
19898 tmp2
= gen_rtx_SET (mem2
, reg2
);
19899 RTX_FRAME_RELATED_P (tmp1
) = 1;
19900 RTX_FRAME_RELATED_P (tmp2
) = 1;
19901 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19902 XVECEXP (par
, 0, 0) = tmp1
;
19903 XVECEXP (par
, 0, 1) = tmp2
;
19907 /* Create unwind information. This is an approximation. */
19908 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19909 plus_constant (Pmode
,
19913 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19914 plus_constant (Pmode
,
19919 RTX_FRAME_RELATED_P (tmp1
) = 1;
19920 RTX_FRAME_RELATED_P (tmp2
) = 1;
19921 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
19922 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
19924 regno
= regno2
+ 1;
19932 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19933 whenever possible, otherwise it emits single-word stores. The first store
19934 also allocates stack space for all saved registers, using writeback with
19935 post-addressing mode. All other stores use offset addressing. If no STRD
19936 can be emitted, this function emits a sequence of single-word stores,
19937 and not an STM as before, because single-word stores provide more freedom
19938 scheduling and can be turned into an STM by peephole optimizations. */
19940 arm_emit_strd_push (unsigned long saved_regs_mask
)
19943 int i
, j
, dwarf_index
= 0;
19945 rtx dwarf
= NULL_RTX
;
19946 rtx insn
= NULL_RTX
;
19949 /* TODO: A more efficient code can be emitted by changing the
19950 layout, e.g., first push all pairs that can use STRD to keep the
19951 stack aligned, and then push all other registers. */
19952 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
19953 if (saved_regs_mask
& (1 << i
))
19956 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19957 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19958 gcc_assert (num_regs
> 0);
19960 /* Create sequence for DWARF info. */
19961 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19963 /* For dwarf info, we generate explicit stack update. */
19964 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19965 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19966 RTX_FRAME_RELATED_P (tmp
) = 1;
19967 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
19969 /* Save registers. */
19970 offset
= - 4 * num_regs
;
19972 while (j
<= LAST_ARM_REGNUM
)
19973 if (saved_regs_mask
& (1 << j
))
19976 && (saved_regs_mask
& (1 << (j
+ 1))))
19978 /* Current register and previous register form register pair for
19979 which STRD can be generated. */
19982 /* Allocate stack space for all saved registers. */
19983 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
19984 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
19985 mem
= gen_frame_mem (DImode
, tmp
);
19988 else if (offset
> 0)
19989 mem
= gen_frame_mem (DImode
,
19990 plus_constant (Pmode
,
19994 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
19996 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
19997 RTX_FRAME_RELATED_P (tmp
) = 1;
19998 tmp
= emit_insn (tmp
);
20000 /* Record the first store insn. */
20001 if (dwarf_index
== 1)
20004 /* Generate dwarf info. */
20005 mem
= gen_frame_mem (SImode
,
20006 plus_constant (Pmode
,
20009 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20010 RTX_FRAME_RELATED_P (tmp
) = 1;
20011 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20013 mem
= gen_frame_mem (SImode
,
20014 plus_constant (Pmode
,
20017 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20018 RTX_FRAME_RELATED_P (tmp
) = 1;
20019 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20026 /* Emit a single word store. */
20029 /* Allocate stack space for all saved registers. */
20030 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20031 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20032 mem
= gen_frame_mem (SImode
, tmp
);
20035 else if (offset
> 0)
20036 mem
= gen_frame_mem (SImode
,
20037 plus_constant (Pmode
,
20041 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20043 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20044 RTX_FRAME_RELATED_P (tmp
) = 1;
20045 tmp
= emit_insn (tmp
);
20047 /* Record the first store insn. */
20048 if (dwarf_index
== 1)
20051 /* Generate dwarf info. */
20052 mem
= gen_frame_mem (SImode
,
20053 plus_constant(Pmode
,
20056 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20057 RTX_FRAME_RELATED_P (tmp
) = 1;
20058 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20067 /* Attach dwarf info to the first insn we generate. */
20068 gcc_assert (insn
!= NULL_RTX
);
20069 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20070 RTX_FRAME_RELATED_P (insn
) = 1;
20073 /* Generate and emit an insn that we will recognize as a push_multi.
20074 Unfortunately, since this insn does not reflect very well the actual
20075 semantics of the operation, we need to annotate the insn for the benefit
20076 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20077 MASK for registers that should be annotated for DWARF2 frame unwind
20080 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20083 int num_dwarf_regs
= 0;
20087 int dwarf_par_index
;
20090 /* We don't record the PC in the dwarf frame information. */
20091 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20093 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20095 if (mask
& (1 << i
))
20097 if (dwarf_regs_mask
& (1 << i
))
20101 gcc_assert (num_regs
&& num_regs
<= 16);
20102 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20104 /* For the body of the insn we are going to generate an UNSPEC in
20105 parallel with several USEs. This allows the insn to be recognized
20106 by the push_multi pattern in the arm.md file.
20108 The body of the insn looks something like this:
20111 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20112 (const_int:SI <num>)))
20113 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20119 For the frame note however, we try to be more explicit and actually
20120 show each register being stored into the stack frame, plus a (single)
20121 decrement of the stack pointer. We do it this way in order to be
20122 friendly to the stack unwinding code, which only wants to see a single
20123 stack decrement per instruction. The RTL we generate for the note looks
20124 something like this:
20127 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20128 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20129 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20130 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20134 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20135 instead we'd have a parallel expression detailing all
20136 the stores to the various memory addresses so that debug
20137 information is more up-to-date. Remember however while writing
20138 this to take care of the constraints with the push instruction.
20140 Note also that this has to be taken care of for the VFP registers.
20142 For more see PR43399. */
20144 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20145 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20146 dwarf_par_index
= 1;
20148 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20150 if (mask
& (1 << i
))
20152 reg
= gen_rtx_REG (SImode
, i
);
20154 XVECEXP (par
, 0, 0)
20155 = gen_rtx_SET (gen_frame_mem
20157 gen_rtx_PRE_MODIFY (Pmode
,
20160 (Pmode
, stack_pointer_rtx
,
20163 gen_rtx_UNSPEC (BLKmode
,
20164 gen_rtvec (1, reg
),
20165 UNSPEC_PUSH_MULT
));
20167 if (dwarf_regs_mask
& (1 << i
))
20169 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20171 RTX_FRAME_RELATED_P (tmp
) = 1;
20172 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20179 for (j
= 1, i
++; j
< num_regs
; i
++)
20181 if (mask
& (1 << i
))
20183 reg
= gen_rtx_REG (SImode
, i
);
20185 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20187 if (dwarf_regs_mask
& (1 << i
))
20190 = gen_rtx_SET (gen_frame_mem
20192 plus_constant (Pmode
, stack_pointer_rtx
,
20195 RTX_FRAME_RELATED_P (tmp
) = 1;
20196 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20203 par
= emit_insn (par
);
20205 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20206 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20207 RTX_FRAME_RELATED_P (tmp
) = 1;
20208 XVECEXP (dwarf
, 0, 0) = tmp
;
20210 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20215 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20216 SIZE is the offset to be adjusted.
20217 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20219 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20223 RTX_FRAME_RELATED_P (insn
) = 1;
20224 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20225 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20228 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20229 SAVED_REGS_MASK shows which registers need to be restored.
20231 Unfortunately, since this insn does not reflect very well the actual
20232 semantics of the operation, we need to annotate the insn for the benefit
20233 of DWARF2 frame unwind information. */
20235 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20240 rtx dwarf
= NULL_RTX
;
20242 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20246 offset_adj
= return_in_pc
? 1 : 0;
20247 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20248 if (saved_regs_mask
& (1 << i
))
20251 gcc_assert (num_regs
&& num_regs
<= 16);
20253 /* If SP is in reglist, then we don't emit SP update insn. */
20254 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20256 /* The parallel needs to hold num_regs SETs
20257 and one SET for the stack update. */
20258 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20261 XVECEXP (par
, 0, 0) = ret_rtx
;
20265 /* Increment the stack pointer, based on there being
20266 num_regs 4-byte registers to restore. */
20267 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20268 plus_constant (Pmode
,
20271 RTX_FRAME_RELATED_P (tmp
) = 1;
20272 XVECEXP (par
, 0, offset_adj
) = tmp
;
20275 /* Now restore every reg, which may include PC. */
20276 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20277 if (saved_regs_mask
& (1 << i
))
20279 reg
= gen_rtx_REG (SImode
, i
);
20280 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20282 /* Emit single load with writeback. */
20283 tmp
= gen_frame_mem (SImode
,
20284 gen_rtx_POST_INC (Pmode
,
20285 stack_pointer_rtx
));
20286 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20287 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20291 tmp
= gen_rtx_SET (reg
,
20294 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20295 RTX_FRAME_RELATED_P (tmp
) = 1;
20296 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20298 /* We need to maintain a sequence for DWARF info too. As dwarf info
20299 should not have PC, skip PC. */
20300 if (i
!= PC_REGNUM
)
20301 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20307 par
= emit_jump_insn (par
);
20309 par
= emit_insn (par
);
20311 REG_NOTES (par
) = dwarf
;
20313 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20314 stack_pointer_rtx
, stack_pointer_rtx
);
20317 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20318 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20320 Unfortunately, since this insn does not reflect very well the actual
20321 semantics of the operation, we need to annotate the insn for the benefit
20322 of DWARF2 frame unwind information. */
20324 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20328 rtx dwarf
= NULL_RTX
;
20331 gcc_assert (num_regs
&& num_regs
<= 32);
20333 /* Workaround ARM10 VFPr1 bug. */
20334 if (num_regs
== 2 && !arm_arch6
)
20336 if (first_reg
== 15)
20342 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20343 there could be up to 32 D-registers to restore.
20344 If there are more than 16 D-registers, make two recursive calls,
20345 each of which emits one pop_multi instruction. */
20348 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20349 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20353 /* The parallel needs to hold num_regs SETs
20354 and one SET for the stack update. */
20355 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20357 /* Increment the stack pointer, based on there being
20358 num_regs 8-byte registers to restore. */
20359 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20360 RTX_FRAME_RELATED_P (tmp
) = 1;
20361 XVECEXP (par
, 0, 0) = tmp
;
20363 /* Now show every reg that will be restored, using a SET for each. */
20364 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20366 reg
= gen_rtx_REG (DFmode
, i
);
20368 tmp
= gen_rtx_SET (reg
,
20371 plus_constant (Pmode
, base_reg
, 8 * j
)));
20372 RTX_FRAME_RELATED_P (tmp
) = 1;
20373 XVECEXP (par
, 0, j
+ 1) = tmp
;
20375 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20380 par
= emit_insn (par
);
20381 REG_NOTES (par
) = dwarf
;
20383 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20384 if (REGNO (base_reg
) == IP_REGNUM
)
20386 RTX_FRAME_RELATED_P (par
) = 1;
20387 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20390 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20391 base_reg
, base_reg
);
20394 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20395 number of registers are being popped, multiple LDRD patterns are created for
20396 all register pairs. If odd number of registers are popped, last register is
20397 loaded by using LDR pattern. */
20399 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20403 rtx par
= NULL_RTX
;
20404 rtx dwarf
= NULL_RTX
;
20405 rtx tmp
, reg
, tmp1
;
20406 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20408 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20409 if (saved_regs_mask
& (1 << i
))
20412 gcc_assert (num_regs
&& num_regs
<= 16);
20414 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20415 to be popped. So, if num_regs is even, now it will become odd,
20416 and we can generate pop with PC. If num_regs is odd, it will be
20417 even now, and ldr with return can be generated for PC. */
20421 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20423 /* Var j iterates over all the registers to gather all the registers in
20424 saved_regs_mask. Var i gives index of saved registers in stack frame.
20425 A PARALLEL RTX of register-pair is created here, so that pattern for
20426 LDRD can be matched. As PC is always last register to be popped, and
20427 we have already decremented num_regs if PC, we don't have to worry
20428 about PC in this loop. */
20429 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20430 if (saved_regs_mask
& (1 << j
))
20432 /* Create RTX for memory load. */
20433 reg
= gen_rtx_REG (SImode
, j
);
20434 tmp
= gen_rtx_SET (reg
,
20435 gen_frame_mem (SImode
,
20436 plus_constant (Pmode
,
20437 stack_pointer_rtx
, 4 * i
)));
20438 RTX_FRAME_RELATED_P (tmp
) = 1;
20442 /* When saved-register index (i) is even, the RTX to be emitted is
20443 yet to be created. Hence create it first. The LDRD pattern we
20444 are generating is :
20445 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20446 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20447 where target registers need not be consecutive. */
20448 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20452 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20453 added as 0th element and if i is odd, reg_i is added as 1st element
20454 of LDRD pattern shown above. */
20455 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20456 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20460 /* When saved-register index (i) is odd, RTXs for both the registers
20461 to be loaded are generated in above given LDRD pattern, and the
20462 pattern can be emitted now. */
20463 par
= emit_insn (par
);
20464 REG_NOTES (par
) = dwarf
;
20465 RTX_FRAME_RELATED_P (par
) = 1;
20471 /* If the number of registers pushed is odd AND return_in_pc is false OR
20472 number of registers are even AND return_in_pc is true, last register is
20473 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20474 then LDR with post increment. */
20476 /* Increment the stack pointer, based on there being
20477 num_regs 4-byte registers to restore. */
20478 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20479 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20480 RTX_FRAME_RELATED_P (tmp
) = 1;
20481 tmp
= emit_insn (tmp
);
20484 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20485 stack_pointer_rtx
, stack_pointer_rtx
);
20490 if (((num_regs
% 2) == 1 && !return_in_pc
)
20491 || ((num_regs
% 2) == 0 && return_in_pc
))
20493 /* Scan for the single register to be popped. Skip until the saved
20494 register is found. */
20495 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20497 /* Gen LDR with post increment here. */
20498 tmp1
= gen_rtx_MEM (SImode
,
20499 gen_rtx_POST_INC (SImode
,
20500 stack_pointer_rtx
));
20501 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20503 reg
= gen_rtx_REG (SImode
, j
);
20504 tmp
= gen_rtx_SET (reg
, tmp1
);
20505 RTX_FRAME_RELATED_P (tmp
) = 1;
20506 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20510 /* If return_in_pc, j must be PC_REGNUM. */
20511 gcc_assert (j
== PC_REGNUM
);
20512 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20513 XVECEXP (par
, 0, 0) = ret_rtx
;
20514 XVECEXP (par
, 0, 1) = tmp
;
20515 par
= emit_jump_insn (par
);
20519 par
= emit_insn (tmp
);
20520 REG_NOTES (par
) = dwarf
;
20521 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20522 stack_pointer_rtx
, stack_pointer_rtx
);
20526 else if ((num_regs
% 2) == 1 && return_in_pc
)
20528 /* There are 2 registers to be popped. So, generate the pattern
20529 pop_multiple_with_stack_update_and_return to pop in PC. */
20530 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20536 /* LDRD in ARM mode needs consecutive registers as operands. This function
20537 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20538 offset addressing and then generates one separate stack udpate. This provides
20539 more scheduling freedom, compared to writeback on every load. However,
20540 if the function returns using load into PC directly
20541 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20542 before the last load. TODO: Add a peephole optimization to recognize
20543 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20544 peephole optimization to merge the load at stack-offset zero
20545 with the stack update instruction using load with writeback
20546 in post-index addressing mode. */
20548 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20552 rtx par
= NULL_RTX
;
20553 rtx dwarf
= NULL_RTX
;
20556 /* Restore saved registers. */
20557 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20559 while (j
<= LAST_ARM_REGNUM
)
20560 if (saved_regs_mask
& (1 << j
))
20563 && (saved_regs_mask
& (1 << (j
+ 1)))
20564 && (j
+ 1) != PC_REGNUM
)
20566 /* Current register and next register form register pair for which
20567 LDRD can be generated. PC is always the last register popped, and
20568 we handle it separately. */
20570 mem
= gen_frame_mem (DImode
,
20571 plus_constant (Pmode
,
20575 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20577 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20578 tmp
= emit_insn (tmp
);
20579 RTX_FRAME_RELATED_P (tmp
) = 1;
20581 /* Generate dwarf info. */
20583 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20584 gen_rtx_REG (SImode
, j
),
20586 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20587 gen_rtx_REG (SImode
, j
+ 1),
20590 REG_NOTES (tmp
) = dwarf
;
20595 else if (j
!= PC_REGNUM
)
20597 /* Emit a single word load. */
20599 mem
= gen_frame_mem (SImode
,
20600 plus_constant (Pmode
,
20604 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20606 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20607 tmp
= emit_insn (tmp
);
20608 RTX_FRAME_RELATED_P (tmp
) = 1;
20610 /* Generate dwarf info. */
20611 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20612 gen_rtx_REG (SImode
, j
),
20618 else /* j == PC_REGNUM */
20624 /* Update the stack. */
20627 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20628 plus_constant (Pmode
,
20631 tmp
= emit_insn (tmp
);
20632 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20633 stack_pointer_rtx
, stack_pointer_rtx
);
20637 if (saved_regs_mask
& (1 << PC_REGNUM
))
20639 /* Only PC is to be popped. */
20640 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20641 XVECEXP (par
, 0, 0) = ret_rtx
;
20642 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20643 gen_frame_mem (SImode
,
20644 gen_rtx_POST_INC (SImode
,
20645 stack_pointer_rtx
)));
20646 RTX_FRAME_RELATED_P (tmp
) = 1;
20647 XVECEXP (par
, 0, 1) = tmp
;
20648 par
= emit_jump_insn (par
);
20650 /* Generate dwarf info. */
20651 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20652 gen_rtx_REG (SImode
, PC_REGNUM
),
20654 REG_NOTES (par
) = dwarf
;
20655 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20656 stack_pointer_rtx
, stack_pointer_rtx
);
20660 /* Calculate the size of the return value that is passed in registers. */
20662 arm_size_return_regs (void)
20666 if (crtl
->return_rtx
!= 0)
20667 mode
= GET_MODE (crtl
->return_rtx
);
20669 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20671 return GET_MODE_SIZE (mode
);
20674 /* Return true if the current function needs to save/restore LR. */
20676 thumb_force_lr_save (void)
20678 return !cfun
->machine
->lr_save_eliminated
20680 || thumb_far_jump_used_p ()
20681 || df_regs_ever_live_p (LR_REGNUM
));
20684 /* We do not know if r3 will be available because
20685 we do have an indirect tailcall happening in this
20686 particular case. */
20688 is_indirect_tailcall_p (rtx call
)
20690 rtx pat
= PATTERN (call
);
20692 /* Indirect tail call. */
20693 pat
= XVECEXP (pat
, 0, 0);
20694 if (GET_CODE (pat
) == SET
)
20695 pat
= SET_SRC (pat
);
20697 pat
= XEXP (XEXP (pat
, 0), 0);
20698 return REG_P (pat
);
20701 /* Return true if r3 is used by any of the tail call insns in the
20702 current function. */
20704 any_sibcall_could_use_r3 (void)
20709 if (!crtl
->tail_call_emit
)
20711 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20712 if (e
->flags
& EDGE_SIBCALL
)
20714 rtx_insn
*call
= BB_END (e
->src
);
20715 if (!CALL_P (call
))
20716 call
= prev_nonnote_nondebug_insn (call
);
20717 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20718 if (find_regno_fusage (call
, USE
, 3)
20719 || is_indirect_tailcall_p (call
))
20726 /* Compute the distance from register FROM to register TO.
20727 These can be the arg pointer (26), the soft frame pointer (25),
20728 the stack pointer (13) or the hard frame pointer (11).
20729 In thumb mode r7 is used as the soft frame pointer, if needed.
20730 Typical stack layout looks like this:
20732 old stack pointer -> | |
20735 | | saved arguments for
20736 | | vararg functions
20739 hard FP & arg pointer -> | | \
20747 soft frame pointer -> | | /
20752 locals base pointer -> | | /
20757 current stack pointer -> | | /
20760 For a given function some or all of these stack components
20761 may not be needed, giving rise to the possibility of
20762 eliminating some of the registers.
20764 The values returned by this function must reflect the behavior
20765 of arm_expand_prologue() and arm_compute_save_reg_mask().
20767 The sign of the number returned reflects the direction of stack
20768 growth, so the values are positive for all eliminations except
20769 from the soft frame pointer to the hard frame pointer.
20771 SFP may point just inside the local variables block to ensure correct
20775 /* Calculate stack offsets. These are used to calculate register elimination
20776 offsets and in prologue/epilogue code. Also calculates which registers
20777 should be saved. */
20779 static arm_stack_offsets
*
20780 arm_get_frame_offsets (void)
20782 struct arm_stack_offsets
*offsets
;
20783 unsigned long func_type
;
20786 HOST_WIDE_INT frame_size
;
20789 offsets
= &cfun
->machine
->stack_offsets
;
20791 if (reload_completed
)
20794 /* Initially this is the size of the local variables. It will translated
20795 into an offset once we have determined the size of preceding data. */
20796 frame_size
= ROUND_UP_WORD (get_frame_size ());
20798 /* Space for variadic functions. */
20799 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20801 /* In Thumb mode this is incorrect, but never used. */
20803 = (offsets
->saved_args
20804 + arm_compute_static_chain_stack_bytes ()
20805 + (frame_pointer_needed
? 4 : 0));
20809 unsigned int regno
;
20811 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
20812 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20813 saved
= core_saved
;
20815 /* We know that SP will be doubleword aligned on entry, and we must
20816 preserve that condition at any subroutine call. We also require the
20817 soft frame pointer to be doubleword aligned. */
20819 if (TARGET_REALLY_IWMMXT
)
20821 /* Check for the call-saved iWMMXt registers. */
20822 for (regno
= FIRST_IWMMXT_REGNUM
;
20823 regno
<= LAST_IWMMXT_REGNUM
;
20825 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20829 func_type
= arm_current_func_type ();
20830 /* Space for saved VFP registers. */
20831 if (! IS_VOLATILE (func_type
)
20832 && TARGET_HARD_FLOAT
)
20833 saved
+= arm_get_vfp_saved_size ();
20835 else /* TARGET_THUMB1 */
20837 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
20838 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20839 saved
= core_saved
;
20840 if (TARGET_BACKTRACE
)
20844 /* Saved registers include the stack frame. */
20845 offsets
->saved_regs
20846 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20847 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20849 /* A leaf function does not need any stack alignment if it has nothing
20851 if (crtl
->is_leaf
&& frame_size
== 0
20852 /* However if it calls alloca(), we have a dynamically allocated
20853 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20854 && ! cfun
->calls_alloca
)
20856 offsets
->outgoing_args
= offsets
->soft_frame
;
20857 offsets
->locals_base
= offsets
->soft_frame
;
20861 /* Ensure SFP has the correct alignment. */
20862 if (ARM_DOUBLEWORD_ALIGN
20863 && (offsets
->soft_frame
& 7))
20865 offsets
->soft_frame
+= 4;
20866 /* Try to align stack by pushing an extra reg. Don't bother doing this
20867 when there is a stack frame as the alignment will be rolled into
20868 the normal stack adjustment. */
20869 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20873 /* Register r3 is caller-saved. Normally it does not need to be
20874 saved on entry by the prologue. However if we choose to save
20875 it for padding then we may confuse the compiler into thinking
20876 a prologue sequence is required when in fact it is not. This
20877 will occur when shrink-wrapping if r3 is used as a scratch
20878 register and there are no other callee-saved writes.
20880 This situation can be avoided when other callee-saved registers
20881 are available and r3 is not mandatory if we choose a callee-saved
20882 register for padding. */
20883 bool prefer_callee_reg_p
= false;
20885 /* If it is safe to use r3, then do so. This sometimes
20886 generates better code on Thumb-2 by avoiding the need to
20887 use 32-bit push/pop instructions. */
20888 if (! any_sibcall_could_use_r3 ()
20889 && arm_size_return_regs () <= 12
20890 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20892 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20895 if (!TARGET_THUMB2
)
20896 prefer_callee_reg_p
= true;
20899 || prefer_callee_reg_p
)
20901 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20903 /* Avoid fixed registers; they may be changed at
20904 arbitrary times so it's unsafe to restore them
20905 during the epilogue. */
20907 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
20917 offsets
->saved_regs
+= 4;
20918 offsets
->saved_regs_mask
|= (1 << reg
);
20923 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
20924 offsets
->outgoing_args
= (offsets
->locals_base
20925 + crtl
->outgoing_args_size
);
20927 if (ARM_DOUBLEWORD_ALIGN
)
20929 /* Ensure SP remains doubleword aligned. */
20930 if (offsets
->outgoing_args
& 7)
20931 offsets
->outgoing_args
+= 4;
20932 gcc_assert (!(offsets
->outgoing_args
& 7));
20939 /* Calculate the relative offsets for the different stack pointers. Positive
20940 offsets are in the direction of stack growth. */
20943 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20945 arm_stack_offsets
*offsets
;
20947 offsets
= arm_get_frame_offsets ();
20949 /* OK, now we have enough information to compute the distances.
20950 There must be an entry in these switch tables for each pair
20951 of registers in ELIMINABLE_REGS, even if some of the entries
20952 seem to be redundant or useless. */
20955 case ARG_POINTER_REGNUM
:
20958 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20961 case FRAME_POINTER_REGNUM
:
20962 /* This is the reverse of the soft frame pointer
20963 to hard frame pointer elimination below. */
20964 return offsets
->soft_frame
- offsets
->saved_args
;
20966 case ARM_HARD_FRAME_POINTER_REGNUM
:
20967 /* This is only non-zero in the case where the static chain register
20968 is stored above the frame. */
20969 return offsets
->frame
- offsets
->saved_args
- 4;
20971 case STACK_POINTER_REGNUM
:
20972 /* If nothing has been pushed on the stack at all
20973 then this will return -4. This *is* correct! */
20974 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
20977 gcc_unreachable ();
20979 gcc_unreachable ();
20981 case FRAME_POINTER_REGNUM
:
20984 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20987 case ARM_HARD_FRAME_POINTER_REGNUM
:
20988 /* The hard frame pointer points to the top entry in the
20989 stack frame. The soft frame pointer to the bottom entry
20990 in the stack frame. If there is no stack frame at all,
20991 then they are identical. */
20993 return offsets
->frame
- offsets
->soft_frame
;
20995 case STACK_POINTER_REGNUM
:
20996 return offsets
->outgoing_args
- offsets
->soft_frame
;
20999 gcc_unreachable ();
21001 gcc_unreachable ();
21004 /* You cannot eliminate from the stack pointer.
21005 In theory you could eliminate from the hard frame
21006 pointer to the stack pointer, but this will never
21007 happen, since if a stack frame is not needed the
21008 hard frame pointer will never be used. */
21009 gcc_unreachable ();
21013 /* Given FROM and TO register numbers, say whether this elimination is
21014 allowed. Frame pointer elimination is automatically handled.
21016 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21017 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21018 pointer, we must eliminate FRAME_POINTER_REGNUM into
21019 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21020 ARG_POINTER_REGNUM. */
21023 arm_can_eliminate (const int from
, const int to
)
21025 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21026 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21027 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21028 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21032 /* Emit RTL to save coprocessor registers on function entry. Returns the
21033 number of bytes pushed. */
21036 arm_save_coproc_regs(void)
21038 int saved_size
= 0;
21040 unsigned start_reg
;
21043 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21044 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21046 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21047 insn
= gen_rtx_MEM (V2SImode
, insn
);
21048 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21049 RTX_FRAME_RELATED_P (insn
) = 1;
21053 if (TARGET_HARD_FLOAT
)
21055 start_reg
= FIRST_VFP_REGNUM
;
21057 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21059 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21060 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21062 if (start_reg
!= reg
)
21063 saved_size
+= vfp_emit_fstmd (start_reg
,
21064 (reg
- start_reg
) / 2);
21065 start_reg
= reg
+ 2;
21068 if (start_reg
!= reg
)
21069 saved_size
+= vfp_emit_fstmd (start_reg
,
21070 (reg
- start_reg
) / 2);
21076 /* Set the Thumb frame pointer from the stack pointer. */
21079 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21081 HOST_WIDE_INT amount
;
21084 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21086 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21087 stack_pointer_rtx
, GEN_INT (amount
)));
21090 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21091 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21092 expects the first two operands to be the same. */
21095 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21097 hard_frame_pointer_rtx
));
21101 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21102 hard_frame_pointer_rtx
,
21103 stack_pointer_rtx
));
21105 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21106 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21107 RTX_FRAME_RELATED_P (dwarf
) = 1;
21108 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21111 RTX_FRAME_RELATED_P (insn
) = 1;
21114 struct scratch_reg
{
21119 /* Return a short-lived scratch register for use as a 2nd scratch register on
21120 function entry after the registers are saved in the prologue. This register
21121 must be released by means of release_scratch_register_on_entry. IP is not
21122 considered since it is always used as the 1st scratch register if available.
21124 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21125 mask of live registers. */
21128 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21129 unsigned long live_regs
)
21135 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21141 for (i
= 4; i
< 11; i
++)
21142 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21150 /* If IP is used as the 1st scratch register for a nested function,
21151 then either r3 wasn't available or is used to preserve IP. */
21152 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21154 regno
= (regno1
== 3 ? 2 : 3);
21156 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21161 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21164 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21165 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21166 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21167 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21168 RTX_FRAME_RELATED_P (insn
) = 1;
21169 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21173 /* Release a scratch register obtained from the preceding function. */
21176 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21180 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21181 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21182 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21183 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21184 RTX_FRAME_RELATED_P (insn
) = 1;
21185 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21189 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21191 #if PROBE_INTERVAL > 4096
21192 #error Cannot use indexed addressing mode for stack probing
21195 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21196 inclusive. These are offsets from the current stack pointer. REGNO1
21197 is the index number of the 1st scratch register and LIVE_REGS is the
21198 mask of live registers. */
21201 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21202 unsigned int regno1
, unsigned long live_regs
)
21204 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21206 /* See if we have a constant small number of probes to generate. If so,
21207 that's the easy case. */
21208 if (size
<= PROBE_INTERVAL
)
21210 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21211 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21212 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21215 /* The run-time loop is made up of 10 insns in the generic case while the
21216 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21217 else if (size
<= 5 * PROBE_INTERVAL
)
21219 HOST_WIDE_INT i
, rem
;
21221 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21222 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21223 emit_stack_probe (reg1
);
21225 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21226 it exceeds SIZE. If only two probes are needed, this will not
21227 generate any code. Then probe at FIRST + SIZE. */
21228 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21230 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21231 emit_stack_probe (reg1
);
21234 rem
= size
- (i
- PROBE_INTERVAL
);
21235 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21237 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21238 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21241 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21244 /* Otherwise, do the same as above, but in a loop. Note that we must be
21245 extra careful with variables wrapping around because we might be at
21246 the very top (or the very bottom) of the address space and we have
21247 to be able to handle this case properly; in particular, we use an
21248 equality test for the loop condition. */
21251 HOST_WIDE_INT rounded_size
;
21252 struct scratch_reg sr
;
21254 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21256 emit_move_insn (reg1
, GEN_INT (first
));
21259 /* Step 1: round SIZE to the previous multiple of the interval. */
21261 rounded_size
= size
& -PROBE_INTERVAL
;
21262 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21265 /* Step 2: compute initial and final value of the loop counter. */
21267 /* TEST_ADDR = SP + FIRST. */
21268 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21270 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21271 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21274 /* Step 3: the loop
21278 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21281 while (TEST_ADDR != LAST_ADDR)
21283 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21284 until it is equal to ROUNDED_SIZE. */
21286 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21289 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21290 that SIZE is equal to ROUNDED_SIZE. */
21292 if (size
!= rounded_size
)
21294 HOST_WIDE_INT rem
= size
- rounded_size
;
21296 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21298 emit_set_insn (sr
.reg
,
21299 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21300 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21301 PROBE_INTERVAL
- rem
));
21304 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21307 release_scratch_register_on_entry (&sr
);
21310 /* Make sure nothing is scheduled before we are done. */
21311 emit_insn (gen_blockage ());
21314 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21315 absolute addresses. */
21318 output_probe_stack_range (rtx reg1
, rtx reg2
)
21320 static int labelno
= 0;
21324 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21327 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21329 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21331 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21332 output_asm_insn ("sub\t%0, %0, %1", xops
);
21334 /* Probe at TEST_ADDR. */
21335 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21337 /* Test if TEST_ADDR == LAST_ADDR. */
21339 output_asm_insn ("cmp\t%0, %1", xops
);
21342 fputs ("\tbne\t", asm_out_file
);
21343 assemble_name_raw (asm_out_file
, loop_lab
);
21344 fputc ('\n', asm_out_file
);
21349 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21352 arm_expand_prologue (void)
21357 unsigned long live_regs_mask
;
21358 unsigned long func_type
;
21360 int saved_pretend_args
= 0;
21361 int saved_regs
= 0;
21362 unsigned HOST_WIDE_INT args_to_push
;
21363 HOST_WIDE_INT size
;
21364 arm_stack_offsets
*offsets
;
21367 func_type
= arm_current_func_type ();
21369 /* Naked functions don't have prologues. */
21370 if (IS_NAKED (func_type
))
21372 if (flag_stack_usage_info
)
21373 current_function_static_stack_size
= 0;
21377 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21378 args_to_push
= crtl
->args
.pretend_args_size
;
21380 /* Compute which register we will have to save onto the stack. */
21381 offsets
= arm_get_frame_offsets ();
21382 live_regs_mask
= offsets
->saved_regs_mask
;
21384 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21386 if (IS_STACKALIGN (func_type
))
21390 /* Handle a word-aligned stack pointer. We generate the following:
21395 <save and restore r0 in normal prologue/epilogue>
21399 The unwinder doesn't need to know about the stack realignment.
21400 Just tell it we saved SP in r0. */
21401 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21403 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21404 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21406 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21407 RTX_FRAME_RELATED_P (insn
) = 1;
21408 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21410 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21412 /* ??? The CFA changes here, which may cause GDB to conclude that it
21413 has entered a different function. That said, the unwind info is
21414 correct, individually, before and after this instruction because
21415 we've described the save of SP, which will override the default
21416 handling of SP as restoring from the CFA. */
21417 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21420 /* The static chain register is the same as the IP register. If it is
21421 clobbered when creating the frame, we need to save and restore it. */
21422 clobber_ip
= IS_NESTED (func_type
)
21423 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21424 || (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21425 && !df_regs_ever_live_p (LR_REGNUM
)
21426 && arm_r3_live_at_start_p ()));
21428 /* Find somewhere to store IP whilst the frame is being created.
21429 We try the following places in order:
21431 1. The last argument register r3 if it is available.
21432 2. A slot on the stack above the frame if there are no
21433 arguments to push onto the stack.
21434 3. Register r3 again, after pushing the argument registers
21435 onto the stack, if this is a varargs function.
21436 4. The last slot on the stack created for the arguments to
21437 push, if this isn't a varargs function.
21439 Note - we only need to tell the dwarf2 backend about the SP
21440 adjustment in the second variant; the static chain register
21441 doesn't need to be unwound, as it doesn't contain a value
21442 inherited from the caller. */
21445 if (!arm_r3_live_at_start_p ())
21446 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21447 else if (args_to_push
== 0)
21451 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21454 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21455 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21458 /* Just tell the dwarf backend that we adjusted SP. */
21459 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21460 plus_constant (Pmode
, stack_pointer_rtx
,
21462 RTX_FRAME_RELATED_P (insn
) = 1;
21463 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21467 /* Store the args on the stack. */
21468 if (cfun
->machine
->uses_anonymous_args
)
21470 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21471 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21472 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21473 saved_pretend_args
= 1;
21479 if (args_to_push
== 4)
21480 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21482 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21483 plus_constant (Pmode
,
21487 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21489 /* Just tell the dwarf backend that we adjusted SP. */
21490 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21491 plus_constant (Pmode
, stack_pointer_rtx
,
21493 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21496 RTX_FRAME_RELATED_P (insn
) = 1;
21497 fp_offset
= args_to_push
;
21502 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21504 if (IS_INTERRUPT (func_type
))
21506 /* Interrupt functions must not corrupt any registers.
21507 Creating a frame pointer however, corrupts the IP
21508 register, so we must push it first. */
21509 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21511 /* Do not set RTX_FRAME_RELATED_P on this insn.
21512 The dwarf stack unwinding code only wants to see one
21513 stack decrement per function, and this is not it. If
21514 this instruction is labeled as being part of the frame
21515 creation sequence then dwarf2out_frame_debug_expr will
21516 die when it encounters the assignment of IP to FP
21517 later on, since the use of SP here establishes SP as
21518 the CFA register and not IP.
21520 Anyway this instruction is not really part of the stack
21521 frame creation although it is part of the prologue. */
21524 insn
= emit_set_insn (ip_rtx
,
21525 plus_constant (Pmode
, stack_pointer_rtx
,
21527 RTX_FRAME_RELATED_P (insn
) = 1;
21532 /* Push the argument registers, or reserve space for them. */
21533 if (cfun
->machine
->uses_anonymous_args
)
21534 insn
= emit_multi_reg_push
21535 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21536 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21539 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21540 GEN_INT (- args_to_push
)));
21541 RTX_FRAME_RELATED_P (insn
) = 1;
21544 /* If this is an interrupt service routine, and the link register
21545 is going to be pushed, and we're not generating extra
21546 push of IP (needed when frame is needed and frame layout if apcs),
21547 subtracting four from LR now will mean that the function return
21548 can be done with a single instruction. */
21549 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21550 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21551 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21554 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21556 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21559 if (live_regs_mask
)
21561 unsigned long dwarf_regs_mask
= live_regs_mask
;
21563 saved_regs
+= bit_count (live_regs_mask
) * 4;
21564 if (optimize_size
&& !frame_pointer_needed
21565 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21567 /* If no coprocessor registers are being pushed and we don't have
21568 to worry about a frame pointer then push extra registers to
21569 create the stack frame. This is done is a way that does not
21570 alter the frame layout, so is independent of the epilogue. */
21574 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21576 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21577 if (frame
&& n
* 4 >= frame
)
21580 live_regs_mask
|= (1 << n
) - 1;
21581 saved_regs
+= frame
;
21586 && current_tune
->prefer_ldrd_strd
21587 && !optimize_function_for_size_p (cfun
))
21589 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21591 thumb2_emit_strd_push (live_regs_mask
);
21592 else if (TARGET_ARM
21593 && !TARGET_APCS_FRAME
21594 && !IS_INTERRUPT (func_type
))
21595 arm_emit_strd_push (live_regs_mask
);
21598 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21599 RTX_FRAME_RELATED_P (insn
) = 1;
21604 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21605 RTX_FRAME_RELATED_P (insn
) = 1;
21609 if (! IS_VOLATILE (func_type
))
21610 saved_regs
+= arm_save_coproc_regs ();
21612 if (frame_pointer_needed
&& TARGET_ARM
)
21614 /* Create the new frame pointer. */
21615 if (TARGET_APCS_FRAME
)
21617 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21618 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21619 RTX_FRAME_RELATED_P (insn
) = 1;
21623 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21624 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21625 stack_pointer_rtx
, insn
));
21626 RTX_FRAME_RELATED_P (insn
) = 1;
21630 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21631 if (flag_stack_usage_info
)
21632 current_function_static_stack_size
= size
;
21634 /* If this isn't an interrupt service routine and we have a frame, then do
21635 stack checking. We use IP as the first scratch register, except for the
21636 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21637 if (!IS_INTERRUPT (func_type
)
21638 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
21640 unsigned int regno
;
21642 if (!IS_NESTED (func_type
) || clobber_ip
)
21644 else if (df_regs_ever_live_p (LR_REGNUM
))
21649 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21651 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
21652 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
,
21653 size
- STACK_CHECK_PROTECT
,
21654 regno
, live_regs_mask
);
21657 arm_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
,
21658 regno
, live_regs_mask
);
21661 /* Recover the static chain register. */
21664 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21665 insn
= gen_rtx_REG (SImode
, 3);
21668 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21669 insn
= gen_frame_mem (SImode
, insn
);
21671 emit_set_insn (ip_rtx
, insn
);
21672 emit_insn (gen_force_register_use (ip_rtx
));
21675 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21677 /* This add can produce multiple insns for a large constant, so we
21678 need to get tricky. */
21679 rtx_insn
*last
= get_last_insn ();
21681 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21682 - offsets
->outgoing_args
);
21684 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21688 last
= last
? NEXT_INSN (last
) : get_insns ();
21689 RTX_FRAME_RELATED_P (last
) = 1;
21691 while (last
!= insn
);
21693 /* If the frame pointer is needed, emit a special barrier that
21694 will prevent the scheduler from moving stores to the frame
21695 before the stack adjustment. */
21696 if (frame_pointer_needed
)
21697 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
21698 hard_frame_pointer_rtx
));
21702 if (frame_pointer_needed
&& TARGET_THUMB2
)
21703 thumb_set_frame_pointer (offsets
);
21705 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21707 unsigned long mask
;
21709 mask
= live_regs_mask
;
21710 mask
&= THUMB2_WORK_REGS
;
21711 if (!IS_NESTED (func_type
))
21712 mask
|= (1 << IP_REGNUM
);
21713 arm_load_pic_register (mask
);
21716 /* If we are profiling, make sure no instructions are scheduled before
21717 the call to mcount. Similarly if the user has requested no
21718 scheduling in the prolog. Similarly if we want non-call exceptions
21719 using the EABI unwinder, to prevent faulting instructions from being
21720 swapped with a stack adjustment. */
21721 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21722 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21723 && cfun
->can_throw_non_call_exceptions
))
21724 emit_insn (gen_blockage ());
21726 /* If the link register is being kept alive, with the return address in it,
21727 then make sure that it does not get reused by the ce2 pass. */
21728 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21729 cfun
->machine
->lr_save_eliminated
= 1;
21732 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21734 arm_print_condition (FILE *stream
)
21736 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21738 /* Branch conversion is not implemented for Thumb-2. */
21741 output_operand_lossage ("predicated Thumb instruction");
21744 if (current_insn_predicate
!= NULL
)
21746 output_operand_lossage
21747 ("predicated instruction in conditional sequence");
21751 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21753 else if (current_insn_predicate
)
21755 enum arm_cond_code code
;
21759 output_operand_lossage ("predicated Thumb instruction");
21763 code
= get_arm_condition_code (current_insn_predicate
);
21764 fputs (arm_condition_codes
[code
], stream
);
21769 /* Globally reserved letters: acln
21770 Puncutation letters currently used: @_|?().!#
21771 Lower case letters currently used: bcdefhimpqtvwxyz
21772 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21773 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21775 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21777 If CODE is 'd', then the X is a condition operand and the instruction
21778 should only be executed if the condition is true.
21779 if CODE is 'D', then the X is a condition operand and the instruction
21780 should only be executed if the condition is false: however, if the mode
21781 of the comparison is CCFPEmode, then always execute the instruction -- we
21782 do this because in these circumstances !GE does not necessarily imply LT;
21783 in these cases the instruction pattern will take care to make sure that
21784 an instruction containing %d will follow, thereby undoing the effects of
21785 doing this instruction unconditionally.
21786 If CODE is 'N' then X is a floating point operand that must be negated
21788 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21789 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21791 arm_print_operand (FILE *stream
, rtx x
, int code
)
21796 fputs (ASM_COMMENT_START
, stream
);
21800 fputs (user_label_prefix
, stream
);
21804 fputs (REGISTER_PREFIX
, stream
);
21808 arm_print_condition (stream
);
21812 /* The current condition code for a condition code setting instruction.
21813 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21814 fputc('s', stream
);
21815 arm_print_condition (stream
);
21819 /* If the instruction is conditionally executed then print
21820 the current condition code, otherwise print 's'. */
21821 gcc_assert (TARGET_THUMB2
);
21822 if (current_insn_predicate
)
21823 arm_print_condition (stream
);
21825 fputc('s', stream
);
21828 /* %# is a "break" sequence. It doesn't output anything, but is used to
21829 separate e.g. operand numbers from following text, if that text consists
21830 of further digits which we don't want to be part of the operand
21838 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21839 fprintf (stream
, "%s", fp_const_from_val (&r
));
21843 /* An integer or symbol address without a preceding # sign. */
21845 switch (GET_CODE (x
))
21848 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21852 output_addr_const (stream
, x
);
21856 if (GET_CODE (XEXP (x
, 0)) == PLUS
21857 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21859 output_addr_const (stream
, x
);
21862 /* Fall through. */
21865 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21869 /* An integer that we want to print in HEX. */
21871 switch (GET_CODE (x
))
21874 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21878 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21883 if (CONST_INT_P (x
))
21886 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21887 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21891 putc ('~', stream
);
21892 output_addr_const (stream
, x
);
21897 /* Print the log2 of a CONST_INT. */
21901 if (!CONST_INT_P (x
)
21902 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21903 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21905 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21910 /* The low 16 bits of an immediate constant. */
21911 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
21915 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
21919 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
21927 shift
= shift_op (x
, &val
);
21931 fprintf (stream
, ", %s ", shift
);
21933 arm_print_operand (stream
, XEXP (x
, 1), 0);
21935 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21940 /* An explanation of the 'Q', 'R' and 'H' register operands:
21942 In a pair of registers containing a DI or DF value the 'Q'
21943 operand returns the register number of the register containing
21944 the least significant part of the value. The 'R' operand returns
21945 the register number of the register containing the most
21946 significant part of the value.
21948 The 'H' operand returns the higher of the two register numbers.
21949 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21950 same as the 'Q' operand, since the most significant part of the
21951 value is held in the lower number register. The reverse is true
21952 on systems where WORDS_BIG_ENDIAN is false.
21954 The purpose of these operands is to distinguish between cases
21955 where the endian-ness of the values is important (for example
21956 when they are added together), and cases where the endian-ness
21957 is irrelevant, but the order of register operations is important.
21958 For example when loading a value from memory into a register
21959 pair, the endian-ness does not matter. Provided that the value
21960 from the lower memory address is put into the lower numbered
21961 register, and the value from the higher address is put into the
21962 higher numbered register, the load will work regardless of whether
21963 the value being loaded is big-wordian or little-wordian. The
21964 order of the two register loads can matter however, if the address
21965 of the memory location is actually held in one of the registers
21966 being overwritten by the load.
21968 The 'Q' and 'R' constraints are also available for 64-bit
21971 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21973 rtx part
= gen_lowpart (SImode
, x
);
21974 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
21978 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
21980 output_operand_lossage ("invalid operand for code '%c'", code
);
21984 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
21988 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
21990 machine_mode mode
= GET_MODE (x
);
21993 if (mode
== VOIDmode
)
21995 part
= gen_highpart_mode (SImode
, mode
, x
);
21996 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22000 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22002 output_operand_lossage ("invalid operand for code '%c'", code
);
22006 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22010 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22012 output_operand_lossage ("invalid operand for code '%c'", code
);
22016 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22020 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22022 output_operand_lossage ("invalid operand for code '%c'", code
);
22026 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22030 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22032 output_operand_lossage ("invalid operand for code '%c'", code
);
22036 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22040 asm_fprintf (stream
, "%r",
22041 REG_P (XEXP (x
, 0))
22042 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22046 asm_fprintf (stream
, "{%r-%r}",
22048 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22051 /* Like 'M', but writing doubleword vector registers, for use by Neon
22055 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22056 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22058 asm_fprintf (stream
, "{d%d}", regno
);
22060 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22065 /* CONST_TRUE_RTX means always -- that's the default. */
22066 if (x
== const_true_rtx
)
22069 if (!COMPARISON_P (x
))
22071 output_operand_lossage ("invalid operand for code '%c'", code
);
22075 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22080 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22081 want to do that. */
22082 if (x
== const_true_rtx
)
22084 output_operand_lossage ("instruction never executed");
22087 if (!COMPARISON_P (x
))
22089 output_operand_lossage ("invalid operand for code '%c'", code
);
22093 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22094 (get_arm_condition_code (x
))],
22104 /* Former Maverick support, removed after GCC-4.7. */
22105 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22110 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22111 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22112 /* Bad value for wCG register number. */
22114 output_operand_lossage ("invalid operand for code '%c'", code
);
22119 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22122 /* Print an iWMMXt control register name. */
22124 if (!CONST_INT_P (x
)
22126 || INTVAL (x
) >= 16)
22127 /* Bad value for wC register number. */
22129 output_operand_lossage ("invalid operand for code '%c'", code
);
22135 static const char * wc_reg_names
[16] =
22137 "wCID", "wCon", "wCSSF", "wCASF",
22138 "wC4", "wC5", "wC6", "wC7",
22139 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22140 "wC12", "wC13", "wC14", "wC15"
22143 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22147 /* Print the high single-precision register of a VFP double-precision
22151 machine_mode mode
= GET_MODE (x
);
22154 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22156 output_operand_lossage ("invalid operand for code '%c'", code
);
22161 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22163 output_operand_lossage ("invalid operand for code '%c'", code
);
22167 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22171 /* Print a VFP/Neon double precision or quad precision register name. */
22175 machine_mode mode
= GET_MODE (x
);
22176 int is_quad
= (code
== 'q');
22179 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22181 output_operand_lossage ("invalid operand for code '%c'", code
);
22186 || !IS_VFP_REGNUM (REGNO (x
)))
22188 output_operand_lossage ("invalid operand for code '%c'", code
);
22193 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22194 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22196 output_operand_lossage ("invalid operand for code '%c'", code
);
22200 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22201 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22205 /* These two codes print the low/high doubleword register of a Neon quad
22206 register, respectively. For pair-structure types, can also print
22207 low/high quadword registers. */
22211 machine_mode mode
= GET_MODE (x
);
22214 if ((GET_MODE_SIZE (mode
) != 16
22215 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22217 output_operand_lossage ("invalid operand for code '%c'", code
);
22222 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22224 output_operand_lossage ("invalid operand for code '%c'", code
);
22228 if (GET_MODE_SIZE (mode
) == 16)
22229 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22230 + (code
== 'f' ? 1 : 0));
22232 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22233 + (code
== 'f' ? 1 : 0));
22237 /* Print a VFPv3 floating-point constant, represented as an integer
22241 int index
= vfp3_const_double_index (x
);
22242 gcc_assert (index
!= -1);
22243 fprintf (stream
, "%d", index
);
22247 /* Print bits representing opcode features for Neon.
22249 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22250 and polynomials as unsigned.
22252 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22254 Bit 2 is 1 for rounding functions, 0 otherwise. */
22256 /* Identify the type as 's', 'u', 'p' or 'f'. */
22259 HOST_WIDE_INT bits
= INTVAL (x
);
22260 fputc ("uspf"[bits
& 3], stream
);
22264 /* Likewise, but signed and unsigned integers are both 'i'. */
22267 HOST_WIDE_INT bits
= INTVAL (x
);
22268 fputc ("iipf"[bits
& 3], stream
);
22272 /* As for 'T', but emit 'u' instead of 'p'. */
22275 HOST_WIDE_INT bits
= INTVAL (x
);
22276 fputc ("usuf"[bits
& 3], stream
);
22280 /* Bit 2: rounding (vs none). */
22283 HOST_WIDE_INT bits
= INTVAL (x
);
22284 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22288 /* Memory operand for vld1/vst1 instruction. */
22292 bool postinc
= FALSE
;
22293 rtx postinc_reg
= NULL
;
22294 unsigned align
, memsize
, align_bits
;
22296 gcc_assert (MEM_P (x
));
22297 addr
= XEXP (x
, 0);
22298 if (GET_CODE (addr
) == POST_INC
)
22301 addr
= XEXP (addr
, 0);
22303 if (GET_CODE (addr
) == POST_MODIFY
)
22305 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22306 addr
= XEXP (addr
, 0);
22308 asm_fprintf (stream
, "[%r", REGNO (addr
));
22310 /* We know the alignment of this access, so we can emit a hint in the
22311 instruction (for some alignments) as an aid to the memory subsystem
22313 align
= MEM_ALIGN (x
) >> 3;
22314 memsize
= MEM_SIZE (x
);
22316 /* Only certain alignment specifiers are supported by the hardware. */
22317 if (memsize
== 32 && (align
% 32) == 0)
22319 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22321 else if (memsize
>= 8 && (align
% 8) == 0)
22326 if (align_bits
!= 0)
22327 asm_fprintf (stream
, ":%d", align_bits
);
22329 asm_fprintf (stream
, "]");
22332 fputs("!", stream
);
22334 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22342 gcc_assert (MEM_P (x
));
22343 addr
= XEXP (x
, 0);
22344 gcc_assert (REG_P (addr
));
22345 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22349 /* Translate an S register number into a D register number and element index. */
22352 machine_mode mode
= GET_MODE (x
);
22355 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22357 output_operand_lossage ("invalid operand for code '%c'", code
);
22362 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22364 output_operand_lossage ("invalid operand for code '%c'", code
);
22368 regno
= regno
- FIRST_VFP_REGNUM
;
22369 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22374 gcc_assert (CONST_DOUBLE_P (x
));
22376 result
= vfp3_const_double_for_fract_bits (x
);
22378 result
= vfp3_const_double_for_bits (x
);
22379 fprintf (stream
, "#%d", result
);
22382 /* Register specifier for vld1.16/vst1.16. Translate the S register
22383 number into a D register number and element index. */
22386 machine_mode mode
= GET_MODE (x
);
22389 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22391 output_operand_lossage ("invalid operand for code '%c'", code
);
22396 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22398 output_operand_lossage ("invalid operand for code '%c'", code
);
22402 regno
= regno
- FIRST_VFP_REGNUM
;
22403 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22410 output_operand_lossage ("missing operand");
22414 switch (GET_CODE (x
))
22417 asm_fprintf (stream
, "%r", REGNO (x
));
22421 output_address (GET_MODE (x
), XEXP (x
, 0));
22427 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22428 sizeof (fpstr
), 0, 1);
22429 fprintf (stream
, "#%s", fpstr
);
22434 gcc_assert (GET_CODE (x
) != NEG
);
22435 fputc ('#', stream
);
22436 if (GET_CODE (x
) == HIGH
)
22438 fputs (":lower16:", stream
);
22442 output_addr_const (stream
, x
);
22448 /* Target hook for printing a memory address. */
22450 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22454 int is_minus
= GET_CODE (x
) == MINUS
;
22457 asm_fprintf (stream
, "[%r]", REGNO (x
));
22458 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22460 rtx base
= XEXP (x
, 0);
22461 rtx index
= XEXP (x
, 1);
22462 HOST_WIDE_INT offset
= 0;
22464 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22466 /* Ensure that BASE is a register. */
22467 /* (one of them must be). */
22468 /* Also ensure the SP is not used as in index register. */
22469 std::swap (base
, index
);
22471 switch (GET_CODE (index
))
22474 offset
= INTVAL (index
);
22477 asm_fprintf (stream
, "[%r, #%wd]",
22478 REGNO (base
), offset
);
22482 asm_fprintf (stream
, "[%r, %s%r]",
22483 REGNO (base
), is_minus
? "-" : "",
22493 asm_fprintf (stream
, "[%r, %s%r",
22494 REGNO (base
), is_minus
? "-" : "",
22495 REGNO (XEXP (index
, 0)));
22496 arm_print_operand (stream
, index
, 'S');
22497 fputs ("]", stream
);
22502 gcc_unreachable ();
22505 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22506 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22508 gcc_assert (REG_P (XEXP (x
, 0)));
22510 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22511 asm_fprintf (stream
, "[%r, #%s%d]!",
22512 REGNO (XEXP (x
, 0)),
22513 GET_CODE (x
) == PRE_DEC
? "-" : "",
22514 GET_MODE_SIZE (mode
));
22516 asm_fprintf (stream
, "[%r], #%s%d",
22517 REGNO (XEXP (x
, 0)),
22518 GET_CODE (x
) == POST_DEC
? "-" : "",
22519 GET_MODE_SIZE (mode
));
22521 else if (GET_CODE (x
) == PRE_MODIFY
)
22523 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22524 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22525 asm_fprintf (stream
, "#%wd]!",
22526 INTVAL (XEXP (XEXP (x
, 1), 1)));
22528 asm_fprintf (stream
, "%r]!",
22529 REGNO (XEXP (XEXP (x
, 1), 1)));
22531 else if (GET_CODE (x
) == POST_MODIFY
)
22533 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22534 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22535 asm_fprintf (stream
, "#%wd",
22536 INTVAL (XEXP (XEXP (x
, 1), 1)));
22538 asm_fprintf (stream
, "%r",
22539 REGNO (XEXP (XEXP (x
, 1), 1)));
22541 else output_addr_const (stream
, x
);
22546 asm_fprintf (stream
, "[%r]", REGNO (x
));
22547 else if (GET_CODE (x
) == POST_INC
)
22548 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22549 else if (GET_CODE (x
) == PLUS
)
22551 gcc_assert (REG_P (XEXP (x
, 0)));
22552 if (CONST_INT_P (XEXP (x
, 1)))
22553 asm_fprintf (stream
, "[%r, #%wd]",
22554 REGNO (XEXP (x
, 0)),
22555 INTVAL (XEXP (x
, 1)));
22557 asm_fprintf (stream
, "[%r, %r]",
22558 REGNO (XEXP (x
, 0)),
22559 REGNO (XEXP (x
, 1)));
22562 output_addr_const (stream
, x
);
22566 /* Target hook for indicating whether a punctuation character for
22567 TARGET_PRINT_OPERAND is valid. */
22569 arm_print_operand_punct_valid_p (unsigned char code
)
22571 return (code
== '@' || code
== '|' || code
== '.'
22572 || code
== '(' || code
== ')' || code
== '#'
22573 || (TARGET_32BIT
&& (code
== '?'))
22574 || (TARGET_THUMB2
&& (code
== '!'))
22575 || (TARGET_THUMB
&& (code
== '_')));
22578 /* Target hook for assembling integer objects. The ARM version needs to
22579 handle word-sized values specially. */
22581 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22585 if (size
== UNITS_PER_WORD
&& aligned_p
)
22587 fputs ("\t.word\t", asm_out_file
);
22588 output_addr_const (asm_out_file
, x
);
22590 /* Mark symbols as position independent. We only do this in the
22591 .text segment, not in the .data segment. */
22592 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22593 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22595 /* See legitimize_pic_address for an explanation of the
22596 TARGET_VXWORKS_RTP check. */
22597 /* References to weak symbols cannot be resolved locally:
22598 they may be overridden by a non-weak definition at link
22600 if (!arm_pic_data_is_text_relative
22601 || (GET_CODE (x
) == SYMBOL_REF
22602 && (!SYMBOL_REF_LOCAL_P (x
)
22603 || (SYMBOL_REF_DECL (x
)
22604 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22605 fputs ("(GOT)", asm_out_file
);
22607 fputs ("(GOTOFF)", asm_out_file
);
22609 fputc ('\n', asm_out_file
);
22613 mode
= GET_MODE (x
);
22615 if (arm_vector_mode_supported_p (mode
))
22619 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22621 units
= CONST_VECTOR_NUNITS (x
);
22622 size
= GET_MODE_UNIT_SIZE (mode
);
22624 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22625 for (i
= 0; i
< units
; i
++)
22627 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22629 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22632 for (i
= 0; i
< units
; i
++)
22634 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22636 (*CONST_DOUBLE_REAL_VALUE (elt
), GET_MODE_INNER (mode
),
22637 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22643 return default_assemble_integer (x
, size
, aligned_p
);
22647 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22651 if (!TARGET_AAPCS_BASED
)
22654 default_named_section_asm_out_constructor
22655 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22659 /* Put these in the .init_array section, using a special relocation. */
22660 if (priority
!= DEFAULT_INIT_PRIORITY
)
22663 sprintf (buf
, "%s.%.5u",
22664 is_ctor
? ".init_array" : ".fini_array",
22666 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22673 switch_to_section (s
);
22674 assemble_align (POINTER_SIZE
);
22675 fputs ("\t.word\t", asm_out_file
);
22676 output_addr_const (asm_out_file
, symbol
);
22677 fputs ("(target1)\n", asm_out_file
);
22680 /* Add a function to the list of static constructors. */
22683 arm_elf_asm_constructor (rtx symbol
, int priority
)
22685 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22688 /* Add a function to the list of static destructors. */
22691 arm_elf_asm_destructor (rtx symbol
, int priority
)
22693 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22696 /* A finite state machine takes care of noticing whether or not instructions
22697 can be conditionally executed, and thus decrease execution time and code
22698 size by deleting branch instructions. The fsm is controlled by
22699 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22701 /* The state of the fsm controlling condition codes are:
22702 0: normal, do nothing special
22703 1: make ASM_OUTPUT_OPCODE not output this instruction
22704 2: make ASM_OUTPUT_OPCODE not output this instruction
22705 3: make instructions conditional
22706 4: make instructions conditional
22708 State transitions (state->state by whom under condition):
22709 0 -> 1 final_prescan_insn if the `target' is a label
22710 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22711 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22712 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22713 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22714 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22715 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22716 (the target insn is arm_target_insn).
22718 If the jump clobbers the conditions then we use states 2 and 4.
22720 A similar thing can be done with conditional return insns.
22722 XXX In case the `target' is an unconditional branch, this conditionalising
22723 of the instructions always reduces code size, but not always execution
22724 time. But then, I want to reduce the code size to somewhere near what
22725 /bin/cc produces. */
22727 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22728 instructions. When a COND_EXEC instruction is seen the subsequent
22729 instructions are scanned so that multiple conditional instructions can be
22730 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22731 specify the length and true/false mask for the IT block. These will be
22732 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22734 /* Returns the index of the ARM condition code string in
22735 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22736 COMPARISON should be an rtx like `(eq (...) (...))'. */
22739 maybe_get_arm_condition_code (rtx comparison
)
22741 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22742 enum arm_cond_code code
;
22743 enum rtx_code comp_code
= GET_CODE (comparison
);
22745 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22746 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22747 XEXP (comparison
, 1));
22751 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22752 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22753 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22754 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22755 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22756 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22757 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22758 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22759 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22760 case CC_DLTUmode
: code
= ARM_CC
;
22763 if (comp_code
== EQ
)
22764 return ARM_INVERSE_CONDITION_CODE (code
);
22765 if (comp_code
== NE
)
22772 case NE
: return ARM_NE
;
22773 case EQ
: return ARM_EQ
;
22774 case GE
: return ARM_PL
;
22775 case LT
: return ARM_MI
;
22776 default: return ARM_NV
;
22782 case NE
: return ARM_NE
;
22783 case EQ
: return ARM_EQ
;
22784 default: return ARM_NV
;
22790 case NE
: return ARM_MI
;
22791 case EQ
: return ARM_PL
;
22792 default: return ARM_NV
;
22797 /* We can handle all cases except UNEQ and LTGT. */
22800 case GE
: return ARM_GE
;
22801 case GT
: return ARM_GT
;
22802 case LE
: return ARM_LS
;
22803 case LT
: return ARM_MI
;
22804 case NE
: return ARM_NE
;
22805 case EQ
: return ARM_EQ
;
22806 case ORDERED
: return ARM_VC
;
22807 case UNORDERED
: return ARM_VS
;
22808 case UNLT
: return ARM_LT
;
22809 case UNLE
: return ARM_LE
;
22810 case UNGT
: return ARM_HI
;
22811 case UNGE
: return ARM_PL
;
22812 /* UNEQ and LTGT do not have a representation. */
22813 case UNEQ
: /* Fall through. */
22814 case LTGT
: /* Fall through. */
22815 default: return ARM_NV
;
22821 case NE
: return ARM_NE
;
22822 case EQ
: return ARM_EQ
;
22823 case GE
: return ARM_LE
;
22824 case GT
: return ARM_LT
;
22825 case LE
: return ARM_GE
;
22826 case LT
: return ARM_GT
;
22827 case GEU
: return ARM_LS
;
22828 case GTU
: return ARM_CC
;
22829 case LEU
: return ARM_CS
;
22830 case LTU
: return ARM_HI
;
22831 default: return ARM_NV
;
22837 case LTU
: return ARM_CS
;
22838 case GEU
: return ARM_CC
;
22839 case NE
: return ARM_CS
;
22840 case EQ
: return ARM_CC
;
22841 default: return ARM_NV
;
22847 case NE
: return ARM_NE
;
22848 case EQ
: return ARM_EQ
;
22849 case GEU
: return ARM_CS
;
22850 case GTU
: return ARM_HI
;
22851 case LEU
: return ARM_LS
;
22852 case LTU
: return ARM_CC
;
22853 default: return ARM_NV
;
22859 case GE
: return ARM_GE
;
22860 case LT
: return ARM_LT
;
22861 case GEU
: return ARM_CS
;
22862 case LTU
: return ARM_CC
;
22863 default: return ARM_NV
;
22869 case NE
: return ARM_VS
;
22870 case EQ
: return ARM_VC
;
22871 default: return ARM_NV
;
22877 case NE
: return ARM_NE
;
22878 case EQ
: return ARM_EQ
;
22879 case GE
: return ARM_GE
;
22880 case GT
: return ARM_GT
;
22881 case LE
: return ARM_LE
;
22882 case LT
: return ARM_LT
;
22883 case GEU
: return ARM_CS
;
22884 case GTU
: return ARM_HI
;
22885 case LEU
: return ARM_LS
;
22886 case LTU
: return ARM_CC
;
22887 default: return ARM_NV
;
22890 default: gcc_unreachable ();
22894 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22895 static enum arm_cond_code
22896 get_arm_condition_code (rtx comparison
)
22898 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22899 gcc_assert (code
!= ARM_NV
);
22903 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22906 thumb2_final_prescan_insn (rtx_insn
*insn
)
22908 rtx_insn
*first_insn
= insn
;
22909 rtx body
= PATTERN (insn
);
22911 enum arm_cond_code code
;
22916 /* max_insns_skipped in the tune was already taken into account in the
22917 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22918 just emit the IT blocks as we can. It does not make sense to split
22920 max
= MAX_INSN_PER_IT_BLOCK
;
22922 /* Remove the previous insn from the count of insns to be output. */
22923 if (arm_condexec_count
)
22924 arm_condexec_count
--;
22926 /* Nothing to do if we are already inside a conditional block. */
22927 if (arm_condexec_count
)
22930 if (GET_CODE (body
) != COND_EXEC
)
22933 /* Conditional jumps are implemented directly. */
22937 predicate
= COND_EXEC_TEST (body
);
22938 arm_current_cc
= get_arm_condition_code (predicate
);
22940 n
= get_attr_ce_count (insn
);
22941 arm_condexec_count
= 1;
22942 arm_condexec_mask
= (1 << n
) - 1;
22943 arm_condexec_masklen
= n
;
22944 /* See if subsequent instructions can be combined into the same block. */
22947 insn
= next_nonnote_insn (insn
);
22949 /* Jumping into the middle of an IT block is illegal, so a label or
22950 barrier terminates the block. */
22951 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
22954 body
= PATTERN (insn
);
22955 /* USE and CLOBBER aren't really insns, so just skip them. */
22956 if (GET_CODE (body
) == USE
22957 || GET_CODE (body
) == CLOBBER
)
22960 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22961 if (GET_CODE (body
) != COND_EXEC
)
22963 /* Maximum number of conditionally executed instructions in a block. */
22964 n
= get_attr_ce_count (insn
);
22965 if (arm_condexec_masklen
+ n
> max
)
22968 predicate
= COND_EXEC_TEST (body
);
22969 code
= get_arm_condition_code (predicate
);
22970 mask
= (1 << n
) - 1;
22971 if (arm_current_cc
== code
)
22972 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
22973 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
22976 arm_condexec_count
++;
22977 arm_condexec_masklen
+= n
;
22979 /* A jump must be the last instruction in a conditional block. */
22983 /* Restore recog_data (getting the attributes of other insns can
22984 destroy this array, but final.c assumes that it remains intact
22985 across this call). */
22986 extract_constrain_insn_cached (first_insn
);
22990 arm_final_prescan_insn (rtx_insn
*insn
)
22992 /* BODY will hold the body of INSN. */
22993 rtx body
= PATTERN (insn
);
22995 /* This will be 1 if trying to repeat the trick, and things need to be
22996 reversed if it appears to fail. */
22999 /* If we start with a return insn, we only succeed if we find another one. */
23000 int seeking_return
= 0;
23001 enum rtx_code return_code
= UNKNOWN
;
23003 /* START_INSN will hold the insn from where we start looking. This is the
23004 first insn after the following code_label if REVERSE is true. */
23005 rtx_insn
*start_insn
= insn
;
23007 /* If in state 4, check if the target branch is reached, in order to
23008 change back to state 0. */
23009 if (arm_ccfsm_state
== 4)
23011 if (insn
== arm_target_insn
)
23013 arm_target_insn
= NULL
;
23014 arm_ccfsm_state
= 0;
23019 /* If in state 3, it is possible to repeat the trick, if this insn is an
23020 unconditional branch to a label, and immediately following this branch
23021 is the previous target label which is only used once, and the label this
23022 branch jumps to is not too far off. */
23023 if (arm_ccfsm_state
== 3)
23025 if (simplejump_p (insn
))
23027 start_insn
= next_nonnote_insn (start_insn
);
23028 if (BARRIER_P (start_insn
))
23030 /* XXX Isn't this always a barrier? */
23031 start_insn
= next_nonnote_insn (start_insn
);
23033 if (LABEL_P (start_insn
)
23034 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23035 && LABEL_NUSES (start_insn
) == 1)
23040 else if (ANY_RETURN_P (body
))
23042 start_insn
= next_nonnote_insn (start_insn
);
23043 if (BARRIER_P (start_insn
))
23044 start_insn
= next_nonnote_insn (start_insn
);
23045 if (LABEL_P (start_insn
)
23046 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23047 && LABEL_NUSES (start_insn
) == 1)
23050 seeking_return
= 1;
23051 return_code
= GET_CODE (body
);
23060 gcc_assert (!arm_ccfsm_state
|| reverse
);
23061 if (!JUMP_P (insn
))
23064 /* This jump might be paralleled with a clobber of the condition codes
23065 the jump should always come first */
23066 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23067 body
= XVECEXP (body
, 0, 0);
23070 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23071 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23074 int fail
= FALSE
, succeed
= FALSE
;
23075 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23076 int then_not_else
= TRUE
;
23077 rtx_insn
*this_insn
= start_insn
;
23080 /* Register the insn jumped to. */
23083 if (!seeking_return
)
23084 label
= XEXP (SET_SRC (body
), 0);
23086 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23087 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23088 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23090 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23091 then_not_else
= FALSE
;
23093 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23095 seeking_return
= 1;
23096 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23098 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23100 seeking_return
= 1;
23101 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23102 then_not_else
= FALSE
;
23105 gcc_unreachable ();
23107 /* See how many insns this branch skips, and what kind of insns. If all
23108 insns are okay, and the label or unconditional branch to the same
23109 label is not too far away, succeed. */
23110 for (insns_skipped
= 0;
23111 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23115 this_insn
= next_nonnote_insn (this_insn
);
23119 switch (GET_CODE (this_insn
))
23122 /* Succeed if it is the target label, otherwise fail since
23123 control falls in from somewhere else. */
23124 if (this_insn
== label
)
23126 arm_ccfsm_state
= 1;
23134 /* Succeed if the following insn is the target label.
23136 If return insns are used then the last insn in a function
23137 will be a barrier. */
23138 this_insn
= next_nonnote_insn (this_insn
);
23139 if (this_insn
&& this_insn
== label
)
23141 arm_ccfsm_state
= 1;
23149 /* The AAPCS says that conditional calls should not be
23150 used since they make interworking inefficient (the
23151 linker can't transform BL<cond> into BLX). That's
23152 only a problem if the machine has BLX. */
23159 /* Succeed if the following insn is the target label, or
23160 if the following two insns are a barrier and the
23162 this_insn
= next_nonnote_insn (this_insn
);
23163 if (this_insn
&& BARRIER_P (this_insn
))
23164 this_insn
= next_nonnote_insn (this_insn
);
23166 if (this_insn
&& this_insn
== label
23167 && insns_skipped
< max_insns_skipped
)
23169 arm_ccfsm_state
= 1;
23177 /* If this is an unconditional branch to the same label, succeed.
23178 If it is to another label, do nothing. If it is conditional,
23180 /* XXX Probably, the tests for SET and the PC are
23183 scanbody
= PATTERN (this_insn
);
23184 if (GET_CODE (scanbody
) == SET
23185 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23187 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23188 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23190 arm_ccfsm_state
= 2;
23193 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23196 /* Fail if a conditional return is undesirable (e.g. on a
23197 StrongARM), but still allow this if optimizing for size. */
23198 else if (GET_CODE (scanbody
) == return_code
23199 && !use_return_insn (TRUE
, NULL
)
23202 else if (GET_CODE (scanbody
) == return_code
)
23204 arm_ccfsm_state
= 2;
23207 else if (GET_CODE (scanbody
) == PARALLEL
)
23209 switch (get_attr_conds (this_insn
))
23219 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23224 /* Instructions using or affecting the condition codes make it
23226 scanbody
= PATTERN (this_insn
);
23227 if (!(GET_CODE (scanbody
) == SET
23228 || GET_CODE (scanbody
) == PARALLEL
)
23229 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23239 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23240 arm_target_label
= CODE_LABEL_NUMBER (label
);
23243 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23245 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23247 this_insn
= next_nonnote_insn (this_insn
);
23248 gcc_assert (!this_insn
23249 || (!BARRIER_P (this_insn
)
23250 && !LABEL_P (this_insn
)));
23254 /* Oh, dear! we ran off the end.. give up. */
23255 extract_constrain_insn_cached (insn
);
23256 arm_ccfsm_state
= 0;
23257 arm_target_insn
= NULL
;
23260 arm_target_insn
= this_insn
;
23263 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23266 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23268 if (reverse
|| then_not_else
)
23269 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23272 /* Restore recog_data (getting the attributes of other insns can
23273 destroy this array, but final.c assumes that it remains intact
23274 across this call. */
23275 extract_constrain_insn_cached (insn
);
23279 /* Output IT instructions. */
23281 thumb2_asm_output_opcode (FILE * stream
)
23286 if (arm_condexec_mask
)
23288 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23289 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23291 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23292 arm_condition_codes
[arm_current_cc
]);
23293 arm_condexec_mask
= 0;
23297 /* Returns true if REGNO is a valid register
23298 for holding a quantity of type MODE. */
23300 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23302 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23303 return (regno
== CC_REGNUM
23304 || (TARGET_HARD_FLOAT
23305 && regno
== VFPCC_REGNUM
));
23307 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23311 /* For the Thumb we only allow values bigger than SImode in
23312 registers 0 - 6, so that there is always a second low
23313 register available to hold the upper part of the value.
23314 We probably we ought to ensure that the register is the
23315 start of an even numbered register pair. */
23316 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23318 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23320 if (mode
== SFmode
|| mode
== SImode
)
23321 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23323 if (mode
== DFmode
)
23324 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23326 if (mode
== HFmode
)
23327 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23329 /* VFP registers can hold HImode values. */
23330 if (mode
== HImode
)
23331 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23334 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23335 || (VALID_NEON_QREG_MODE (mode
)
23336 && NEON_REGNO_OK_FOR_QUAD (regno
))
23337 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23338 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23339 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23340 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23341 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23346 if (TARGET_REALLY_IWMMXT
)
23348 if (IS_IWMMXT_GR_REGNUM (regno
))
23349 return mode
== SImode
;
23351 if (IS_IWMMXT_REGNUM (regno
))
23352 return VALID_IWMMXT_REG_MODE (mode
);
23355 /* We allow almost any value to be stored in the general registers.
23356 Restrict doubleword quantities to even register pairs in ARM state
23357 so that we can use ldrd. Do not allow very large Neon structure
23358 opaque modes in general registers; they would use too many. */
23359 if (regno
<= LAST_ARM_REGNUM
)
23361 if (ARM_NUM_REGS (mode
) > 4)
23367 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23370 if (regno
== FRAME_POINTER_REGNUM
23371 || regno
== ARG_POINTER_REGNUM
)
23372 /* We only allow integers in the fake hard registers. */
23373 return GET_MODE_CLASS (mode
) == MODE_INT
;
23378 /* Implement MODES_TIEABLE_P. */
23381 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23383 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23386 /* We specifically want to allow elements of "structure" modes to
23387 be tieable to the structure. This more general condition allows
23388 other rarer situations too. */
23390 && (VALID_NEON_DREG_MODE (mode1
)
23391 || VALID_NEON_QREG_MODE (mode1
)
23392 || VALID_NEON_STRUCT_MODE (mode1
))
23393 && (VALID_NEON_DREG_MODE (mode2
)
23394 || VALID_NEON_QREG_MODE (mode2
)
23395 || VALID_NEON_STRUCT_MODE (mode2
)))
23401 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23402 not used in arm mode. */
23405 arm_regno_class (int regno
)
23407 if (regno
== PC_REGNUM
)
23412 if (regno
== STACK_POINTER_REGNUM
)
23414 if (regno
== CC_REGNUM
)
23421 if (TARGET_THUMB2
&& regno
< 8)
23424 if ( regno
<= LAST_ARM_REGNUM
23425 || regno
== FRAME_POINTER_REGNUM
23426 || regno
== ARG_POINTER_REGNUM
)
23427 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23429 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23430 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23432 if (IS_VFP_REGNUM (regno
))
23434 if (regno
<= D7_VFP_REGNUM
)
23435 return VFP_D0_D7_REGS
;
23436 else if (regno
<= LAST_LO_VFP_REGNUM
)
23437 return VFP_LO_REGS
;
23439 return VFP_HI_REGS
;
23442 if (IS_IWMMXT_REGNUM (regno
))
23443 return IWMMXT_REGS
;
23445 if (IS_IWMMXT_GR_REGNUM (regno
))
23446 return IWMMXT_GR_REGS
;
23451 /* Handle a special case when computing the offset
23452 of an argument from the frame pointer. */
23454 arm_debugger_arg_offset (int value
, rtx addr
)
23458 /* We are only interested if dbxout_parms() failed to compute the offset. */
23462 /* We can only cope with the case where the address is held in a register. */
23466 /* If we are using the frame pointer to point at the argument, then
23467 an offset of 0 is correct. */
23468 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23471 /* If we are using the stack pointer to point at the
23472 argument, then an offset of 0 is correct. */
23473 /* ??? Check this is consistent with thumb2 frame layout. */
23474 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23475 && REGNO (addr
) == SP_REGNUM
)
23478 /* Oh dear. The argument is pointed to by a register rather
23479 than being held in a register, or being stored at a known
23480 offset from the frame pointer. Since GDB only understands
23481 those two kinds of argument we must translate the address
23482 held in the register into an offset from the frame pointer.
23483 We do this by searching through the insns for the function
23484 looking to see where this register gets its value. If the
23485 register is initialized from the frame pointer plus an offset
23486 then we are in luck and we can continue, otherwise we give up.
23488 This code is exercised by producing debugging information
23489 for a function with arguments like this:
23491 double func (double a, double b, int c, double d) {return d;}
23493 Without this code the stab for parameter 'd' will be set to
23494 an offset of 0 from the frame pointer, rather than 8. */
23496 /* The if() statement says:
23498 If the insn is a normal instruction
23499 and if the insn is setting the value in a register
23500 and if the register being set is the register holding the address of the argument
23501 and if the address is computing by an addition
23502 that involves adding to a register
23503 which is the frame pointer
23508 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23510 if ( NONJUMP_INSN_P (insn
)
23511 && GET_CODE (PATTERN (insn
)) == SET
23512 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23513 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23514 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23515 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23516 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23519 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23528 warning (0, "unable to compute real location of stacked parameter");
23529 value
= 8; /* XXX magic hack */
23535 /* Implement TARGET_PROMOTED_TYPE. */
23538 arm_promoted_type (const_tree t
)
23540 if (SCALAR_FLOAT_TYPE_P (t
)
23541 && TYPE_PRECISION (t
) == 16
23542 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23543 return float_type_node
;
23547 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23548 This simply adds HFmode as a supported mode; even though we don't
23549 implement arithmetic on this type directly, it's supported by
23550 optabs conversions, much the way the double-word arithmetic is
23551 special-cased in the default hook. */
23554 arm_scalar_mode_supported_p (machine_mode mode
)
23556 if (mode
== HFmode
)
23557 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23558 else if (ALL_FIXED_POINT_MODE_P (mode
))
23561 return default_scalar_mode_supported_p (mode
);
23564 /* Set the value of FLT_EVAL_METHOD.
23565 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23567 0: evaluate all operations and constants, whose semantic type has at
23568 most the range and precision of type float, to the range and
23569 precision of float; evaluate all other operations and constants to
23570 the range and precision of the semantic type;
23572 N, where _FloatN is a supported interchange floating type
23573 evaluate all operations and constants, whose semantic type has at
23574 most the range and precision of _FloatN type, to the range and
23575 precision of the _FloatN type; evaluate all other operations and
23576 constants to the range and precision of the semantic type;
23578 If we have the ARMv8.2-A extensions then we support _Float16 in native
23579 precision, so we should set this to 16. Otherwise, we support the type,
23580 but want to evaluate expressions in float precision, so set this to
23583 static enum flt_eval_method
23584 arm_excess_precision (enum excess_precision_type type
)
23588 case EXCESS_PRECISION_TYPE_FAST
:
23589 case EXCESS_PRECISION_TYPE_STANDARD
:
23590 /* We can calculate either in 16-bit range and precision or
23591 32-bit range and precision. Make that decision based on whether
23592 we have native support for the ARMv8.2-A 16-bit floating-point
23593 instructions or not. */
23594 return (TARGET_VFP_FP16INST
23595 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23596 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23597 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23598 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23600 gcc_unreachable ();
23602 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23606 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23607 _Float16 if we are using anything other than ieee format for 16-bit
23608 floating point. Otherwise, punt to the default implementation. */
23609 static machine_mode
23610 arm_floatn_mode (int n
, bool extended
)
23612 if (!extended
&& n
== 16)
23613 return arm_fp16_format
== ARM_FP16_FORMAT_IEEE
? HFmode
: VOIDmode
;
23615 return default_floatn_mode (n
, extended
);
23619 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23620 not to early-clobber SRC registers in the process.
23622 We assume that the operands described by SRC and DEST represent a
23623 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23624 number of components into which the copy has been decomposed. */
23626 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23630 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23631 || REGNO (operands
[0]) < REGNO (operands
[1]))
23633 for (i
= 0; i
< count
; i
++)
23635 operands
[2 * i
] = dest
[i
];
23636 operands
[2 * i
+ 1] = src
[i
];
23641 for (i
= 0; i
< count
; i
++)
23643 operands
[2 * i
] = dest
[count
- i
- 1];
23644 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23649 /* Split operands into moves from op[1] + op[2] into op[0]. */
23652 neon_split_vcombine (rtx operands
[3])
23654 unsigned int dest
= REGNO (operands
[0]);
23655 unsigned int src1
= REGNO (operands
[1]);
23656 unsigned int src2
= REGNO (operands
[2]);
23657 machine_mode halfmode
= GET_MODE (operands
[1]);
23658 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
23659 rtx destlo
, desthi
;
23661 if (src1
== dest
&& src2
== dest
+ halfregs
)
23663 /* No-op move. Can't split to nothing; emit something. */
23664 emit_note (NOTE_INSN_DELETED
);
23668 /* Preserve register attributes for variable tracking. */
23669 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23670 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23671 GET_MODE_SIZE (halfmode
));
23673 /* Special case of reversed high/low parts. Use VSWP. */
23674 if (src2
== dest
&& src1
== dest
+ halfregs
)
23676 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23677 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23678 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23682 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23684 /* Try to avoid unnecessary moves if part of the result
23685 is in the right place already. */
23687 emit_move_insn (destlo
, operands
[1]);
23688 if (src2
!= dest
+ halfregs
)
23689 emit_move_insn (desthi
, operands
[2]);
23693 if (src2
!= dest
+ halfregs
)
23694 emit_move_insn (desthi
, operands
[2]);
23696 emit_move_insn (destlo
, operands
[1]);
23700 /* Return the number (counting from 0) of
23701 the least significant set bit in MASK. */
23704 number_of_first_bit_set (unsigned mask
)
23706 return ctz_hwi (mask
);
23709 /* Like emit_multi_reg_push, but allowing for a different set of
23710 registers to be described as saved. MASK is the set of registers
23711 to be saved; REAL_REGS is the set of registers to be described as
23712 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23715 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23717 unsigned long regno
;
23718 rtx par
[10], tmp
, reg
;
23722 /* Build the parallel of the registers actually being stored. */
23723 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23725 regno
= ctz_hwi (mask
);
23726 reg
= gen_rtx_REG (SImode
, regno
);
23729 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23731 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23736 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23737 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23738 tmp
= gen_frame_mem (BLKmode
, tmp
);
23739 tmp
= gen_rtx_SET (tmp
, par
[0]);
23742 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23743 insn
= emit_insn (tmp
);
23745 /* Always build the stack adjustment note for unwind info. */
23746 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23747 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23750 /* Build the parallel of the registers recorded as saved for unwind. */
23751 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23753 regno
= ctz_hwi (real_regs
);
23754 reg
= gen_rtx_REG (SImode
, regno
);
23756 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23757 tmp
= gen_frame_mem (SImode
, tmp
);
23758 tmp
= gen_rtx_SET (tmp
, reg
);
23759 RTX_FRAME_RELATED_P (tmp
) = 1;
23767 RTX_FRAME_RELATED_P (par
[0]) = 1;
23768 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23771 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23776 /* Emit code to push or pop registers to or from the stack. F is the
23777 assembly file. MASK is the registers to pop. */
23779 thumb_pop (FILE *f
, unsigned long mask
)
23782 int lo_mask
= mask
& 0xFF;
23783 int pushed_words
= 0;
23787 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23789 /* Special case. Do not generate a POP PC statement here, do it in
23791 thumb_exit (f
, -1);
23795 fprintf (f
, "\tpop\t{");
23797 /* Look at the low registers first. */
23798 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23802 asm_fprintf (f
, "%r", regno
);
23804 if ((lo_mask
& ~1) != 0)
23811 if (mask
& (1 << PC_REGNUM
))
23813 /* Catch popping the PC. */
23814 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23815 || IS_CMSE_ENTRY (arm_current_func_type ()))
23817 /* The PC is never poped directly, instead
23818 it is popped into r3 and then BX is used. */
23819 fprintf (f
, "}\n");
23821 thumb_exit (f
, -1);
23830 asm_fprintf (f
, "%r", PC_REGNUM
);
23834 fprintf (f
, "}\n");
23837 /* Generate code to return from a thumb function.
23838 If 'reg_containing_return_addr' is -1, then the return address is
23839 actually on the stack, at the stack pointer. */
23841 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23843 unsigned regs_available_for_popping
;
23844 unsigned regs_to_pop
;
23846 unsigned available
;
23850 int restore_a4
= FALSE
;
23852 /* Compute the registers we need to pop. */
23856 if (reg_containing_return_addr
== -1)
23858 regs_to_pop
|= 1 << LR_REGNUM
;
23862 if (TARGET_BACKTRACE
)
23864 /* Restore the (ARM) frame pointer and stack pointer. */
23865 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23869 /* If there is nothing to pop then just emit the BX instruction and
23871 if (pops_needed
== 0)
23873 if (crtl
->calls_eh_return
)
23874 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
23876 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23878 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
23879 reg_containing_return_addr
);
23880 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
23883 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
23886 /* Otherwise if we are not supporting interworking and we have not created
23887 a backtrace structure and the function was not entered in ARM mode then
23888 just pop the return address straight into the PC. */
23889 else if (!TARGET_INTERWORK
23890 && !TARGET_BACKTRACE
23891 && !is_called_in_ARM_mode (current_function_decl
)
23892 && !crtl
->calls_eh_return
23893 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23895 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
23899 /* Find out how many of the (return) argument registers we can corrupt. */
23900 regs_available_for_popping
= 0;
23902 /* If returning via __builtin_eh_return, the bottom three registers
23903 all contain information needed for the return. */
23904 if (crtl
->calls_eh_return
)
23908 /* If we can deduce the registers used from the function's
23909 return value. This is more reliable that examining
23910 df_regs_ever_live_p () because that will be set if the register is
23911 ever used in the function, not just if the register is used
23912 to hold a return value. */
23914 if (crtl
->return_rtx
!= 0)
23915 mode
= GET_MODE (crtl
->return_rtx
);
23917 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
23919 size
= GET_MODE_SIZE (mode
);
23923 /* In a void function we can use any argument register.
23924 In a function that returns a structure on the stack
23925 we can use the second and third argument registers. */
23926 if (mode
== VOIDmode
)
23927 regs_available_for_popping
=
23928 (1 << ARG_REGISTER (1))
23929 | (1 << ARG_REGISTER (2))
23930 | (1 << ARG_REGISTER (3));
23932 regs_available_for_popping
=
23933 (1 << ARG_REGISTER (2))
23934 | (1 << ARG_REGISTER (3));
23936 else if (size
<= 4)
23937 regs_available_for_popping
=
23938 (1 << ARG_REGISTER (2))
23939 | (1 << ARG_REGISTER (3));
23940 else if (size
<= 8)
23941 regs_available_for_popping
=
23942 (1 << ARG_REGISTER (3));
23945 /* Match registers to be popped with registers into which we pop them. */
23946 for (available
= regs_available_for_popping
,
23947 required
= regs_to_pop
;
23948 required
!= 0 && available
!= 0;
23949 available
&= ~(available
& - available
),
23950 required
&= ~(required
& - required
))
23953 /* If we have any popping registers left over, remove them. */
23955 regs_available_for_popping
&= ~available
;
23957 /* Otherwise if we need another popping register we can use
23958 the fourth argument register. */
23959 else if (pops_needed
)
23961 /* If we have not found any free argument registers and
23962 reg a4 contains the return address, we must move it. */
23963 if (regs_available_for_popping
== 0
23964 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
23966 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
23967 reg_containing_return_addr
= LR_REGNUM
;
23969 else if (size
> 12)
23971 /* Register a4 is being used to hold part of the return value,
23972 but we have dire need of a free, low register. */
23975 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
23978 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
23980 /* The fourth argument register is available. */
23981 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
23987 /* Pop as many registers as we can. */
23988 thumb_pop (f
, regs_available_for_popping
);
23990 /* Process the registers we popped. */
23991 if (reg_containing_return_addr
== -1)
23993 /* The return address was popped into the lowest numbered register. */
23994 regs_to_pop
&= ~(1 << LR_REGNUM
);
23996 reg_containing_return_addr
=
23997 number_of_first_bit_set (regs_available_for_popping
);
23999 /* Remove this register for the mask of available registers, so that
24000 the return address will not be corrupted by further pops. */
24001 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24004 /* If we popped other registers then handle them here. */
24005 if (regs_available_for_popping
)
24009 /* Work out which register currently contains the frame pointer. */
24010 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24012 /* Move it into the correct place. */
24013 asm_fprintf (f
, "\tmov\t%r, %r\n",
24014 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24016 /* (Temporarily) remove it from the mask of popped registers. */
24017 regs_available_for_popping
&= ~(1 << frame_pointer
);
24018 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24020 if (regs_available_for_popping
)
24024 /* We popped the stack pointer as well,
24025 find the register that contains it. */
24026 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24028 /* Move it into the stack register. */
24029 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24031 /* At this point we have popped all necessary registers, so
24032 do not worry about restoring regs_available_for_popping
24033 to its correct value:
24035 assert (pops_needed == 0)
24036 assert (regs_available_for_popping == (1 << frame_pointer))
24037 assert (regs_to_pop == (1 << STACK_POINTER)) */
24041 /* Since we have just move the popped value into the frame
24042 pointer, the popping register is available for reuse, and
24043 we know that we still have the stack pointer left to pop. */
24044 regs_available_for_popping
|= (1 << frame_pointer
);
24048 /* If we still have registers left on the stack, but we no longer have
24049 any registers into which we can pop them, then we must move the return
24050 address into the link register and make available the register that
24052 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24054 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24056 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24057 reg_containing_return_addr
);
24059 reg_containing_return_addr
= LR_REGNUM
;
24062 /* If we have registers left on the stack then pop some more.
24063 We know that at most we will want to pop FP and SP. */
24064 if (pops_needed
> 0)
24069 thumb_pop (f
, regs_available_for_popping
);
24071 /* We have popped either FP or SP.
24072 Move whichever one it is into the correct register. */
24073 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24074 move_to
= number_of_first_bit_set (regs_to_pop
);
24076 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24078 regs_to_pop
&= ~(1 << move_to
);
24083 /* If we still have not popped everything then we must have only
24084 had one register available to us and we are now popping the SP. */
24085 if (pops_needed
> 0)
24089 thumb_pop (f
, regs_available_for_popping
);
24091 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24093 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24095 assert (regs_to_pop == (1 << STACK_POINTER))
24096 assert (pops_needed == 1)
24100 /* If necessary restore the a4 register. */
24103 if (reg_containing_return_addr
!= LR_REGNUM
)
24105 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24106 reg_containing_return_addr
= LR_REGNUM
;
24109 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24112 if (crtl
->calls_eh_return
)
24113 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24115 /* Return to caller. */
24116 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24118 /* This is for the cases where LR is not being used to contain the return
24119 address. It may therefore contain information that we might not want
24120 to leak, hence it must be cleared. The value in R0 will never be a
24121 secret at this point, so it is safe to use it, see the clearing code
24122 in 'cmse_nonsecure_entry_clear_before_return'. */
24123 if (reg_containing_return_addr
!= LR_REGNUM
)
24124 asm_fprintf (f
, "\tmov\tlr, r0\n");
24126 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24127 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24130 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24133 /* Scan INSN just before assembler is output for it.
24134 For Thumb-1, we track the status of the condition codes; this
24135 information is used in the cbranchsi4_insn pattern. */
24137 thumb1_final_prescan_insn (rtx_insn
*insn
)
24139 if (flag_print_asm_name
)
24140 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24141 INSN_ADDRESSES (INSN_UID (insn
)));
24142 /* Don't overwrite the previous setter when we get to a cbranch. */
24143 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24145 enum attr_conds conds
;
24147 if (cfun
->machine
->thumb1_cc_insn
)
24149 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24150 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24153 conds
= get_attr_conds (insn
);
24154 if (conds
== CONDS_SET
)
24156 rtx set
= single_set (insn
);
24157 cfun
->machine
->thumb1_cc_insn
= insn
;
24158 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24159 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24160 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24161 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24163 rtx src1
= XEXP (SET_SRC (set
), 1);
24164 if (src1
== const0_rtx
)
24165 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24167 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24169 /* Record the src register operand instead of dest because
24170 cprop_hardreg pass propagates src. */
24171 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24174 else if (conds
!= CONDS_NOCOND
)
24175 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24178 /* Check if unexpected far jump is used. */
24179 if (cfun
->machine
->lr_save_eliminated
24180 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24181 internal_error("Unexpected thumb1 far jump");
24185 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24187 unsigned HOST_WIDE_INT mask
= 0xff;
24190 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24191 if (val
== 0) /* XXX */
24194 for (i
= 0; i
< 25; i
++)
24195 if ((val
& (mask
<< i
)) == val
)
24201 /* Returns nonzero if the current function contains,
24202 or might contain a far jump. */
24204 thumb_far_jump_used_p (void)
24207 bool far_jump
= false;
24208 unsigned int func_size
= 0;
24210 /* If we have already decided that far jumps may be used,
24211 do not bother checking again, and always return true even if
24212 it turns out that they are not being used. Once we have made
24213 the decision that far jumps are present (and that hence the link
24214 register will be pushed onto the stack) we cannot go back on it. */
24215 if (cfun
->machine
->far_jump_used
)
24218 /* If this function is not being called from the prologue/epilogue
24219 generation code then it must be being called from the
24220 INITIAL_ELIMINATION_OFFSET macro. */
24221 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24223 /* In this case we know that we are being asked about the elimination
24224 of the arg pointer register. If that register is not being used,
24225 then there are no arguments on the stack, and we do not have to
24226 worry that a far jump might force the prologue to push the link
24227 register, changing the stack offsets. In this case we can just
24228 return false, since the presence of far jumps in the function will
24229 not affect stack offsets.
24231 If the arg pointer is live (or if it was live, but has now been
24232 eliminated and so set to dead) then we do have to test to see if
24233 the function might contain a far jump. This test can lead to some
24234 false negatives, since before reload is completed, then length of
24235 branch instructions is not known, so gcc defaults to returning their
24236 longest length, which in turn sets the far jump attribute to true.
24238 A false negative will not result in bad code being generated, but it
24239 will result in a needless push and pop of the link register. We
24240 hope that this does not occur too often.
24242 If we need doubleword stack alignment this could affect the other
24243 elimination offsets so we can't risk getting it wrong. */
24244 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24245 cfun
->machine
->arg_pointer_live
= 1;
24246 else if (!cfun
->machine
->arg_pointer_live
)
24250 /* We should not change far_jump_used during or after reload, as there is
24251 no chance to change stack frame layout. */
24252 if (reload_in_progress
|| reload_completed
)
24255 /* Check to see if the function contains a branch
24256 insn with the far jump attribute set. */
24257 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24259 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24263 func_size
+= get_attr_length (insn
);
24266 /* Attribute far_jump will always be true for thumb1 before
24267 shorten_branch pass. So checking far_jump attribute before
24268 shorten_branch isn't much useful.
24270 Following heuristic tries to estimate more accurately if a far jump
24271 may finally be used. The heuristic is very conservative as there is
24272 no chance to roll-back the decision of not to use far jump.
24274 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24275 2-byte insn is associated with a 4 byte constant pool. Using
24276 function size 2048/3 as the threshold is conservative enough. */
24279 if ((func_size
* 3) >= 2048)
24281 /* Record the fact that we have decided that
24282 the function does use far jumps. */
24283 cfun
->machine
->far_jump_used
= 1;
24291 /* Return nonzero if FUNC must be entered in ARM mode. */
24293 is_called_in_ARM_mode (tree func
)
24295 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24297 /* Ignore the problem about functions whose address is taken. */
24298 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24302 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24308 /* Given the stack offsets and register mask in OFFSETS, decide how
24309 many additional registers to push instead of subtracting a constant
24310 from SP. For epilogues the principle is the same except we use pop.
24311 FOR_PROLOGUE indicates which we're generating. */
24313 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24315 HOST_WIDE_INT amount
;
24316 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24317 /* Extract a mask of the ones we can give to the Thumb's push/pop
24319 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24320 /* Then count how many other high registers will need to be pushed. */
24321 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24322 int n_free
, reg_base
, size
;
24324 if (!for_prologue
&& frame_pointer_needed
)
24325 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24327 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24329 /* If the stack frame size is 512 exactly, we can save one load
24330 instruction, which should make this a win even when optimizing
24332 if (!optimize_size
&& amount
!= 512)
24335 /* Can't do this if there are high registers to push. */
24336 if (high_regs_pushed
!= 0)
24339 /* Shouldn't do it in the prologue if no registers would normally
24340 be pushed at all. In the epilogue, also allow it if we'll have
24341 a pop insn for the PC. */
24344 || TARGET_BACKTRACE
24345 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24346 || TARGET_INTERWORK
24347 || crtl
->args
.pretend_args_size
!= 0))
24350 /* Don't do this if thumb_expand_prologue wants to emit instructions
24351 between the push and the stack frame allocation. */
24353 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24354 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24361 size
= arm_size_return_regs ();
24362 reg_base
= ARM_NUM_INTS (size
);
24363 live_regs_mask
>>= reg_base
;
24366 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24367 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24369 live_regs_mask
>>= 1;
24375 gcc_assert (amount
/ 4 * 4 == amount
);
24377 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24378 return (amount
- 508) / 4;
24379 if (amount
<= n_free
* 4)
24384 /* The bits which aren't usefully expanded as rtl. */
24386 thumb1_unexpanded_epilogue (void)
24388 arm_stack_offsets
*offsets
;
24390 unsigned long live_regs_mask
= 0;
24391 int high_regs_pushed
= 0;
24393 int had_to_push_lr
;
24396 if (cfun
->machine
->return_used_this_function
!= 0)
24399 if (IS_NAKED (arm_current_func_type ()))
24402 offsets
= arm_get_frame_offsets ();
24403 live_regs_mask
= offsets
->saved_regs_mask
;
24404 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24406 /* If we can deduce the registers used from the function's return value.
24407 This is more reliable that examining df_regs_ever_live_p () because that
24408 will be set if the register is ever used in the function, not just if
24409 the register is used to hold a return value. */
24410 size
= arm_size_return_regs ();
24412 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24415 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24416 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24419 /* The prolog may have pushed some high registers to use as
24420 work registers. e.g. the testsuite file:
24421 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24422 compiles to produce:
24423 push {r4, r5, r6, r7, lr}
24427 as part of the prolog. We have to undo that pushing here. */
24429 if (high_regs_pushed
)
24431 unsigned long mask
= live_regs_mask
& 0xff;
24434 /* The available low registers depend on the size of the value we are
24442 /* Oh dear! We have no low registers into which we can pop
24445 ("no low registers available for popping high registers");
24447 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24448 if (live_regs_mask
& (1 << next_hi_reg
))
24451 while (high_regs_pushed
)
24453 /* Find lo register(s) into which the high register(s) can
24455 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24457 if (mask
& (1 << regno
))
24458 high_regs_pushed
--;
24459 if (high_regs_pushed
== 0)
24463 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24465 /* Pop the values into the low register(s). */
24466 thumb_pop (asm_out_file
, mask
);
24468 /* Move the value(s) into the high registers. */
24469 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24471 if (mask
& (1 << regno
))
24473 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24476 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24477 if (live_regs_mask
& (1 << next_hi_reg
))
24482 live_regs_mask
&= ~0x0f00;
24485 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24486 live_regs_mask
&= 0xff;
24488 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24490 /* Pop the return address into the PC. */
24491 if (had_to_push_lr
)
24492 live_regs_mask
|= 1 << PC_REGNUM
;
24494 /* Either no argument registers were pushed or a backtrace
24495 structure was created which includes an adjusted stack
24496 pointer, so just pop everything. */
24497 if (live_regs_mask
)
24498 thumb_pop (asm_out_file
, live_regs_mask
);
24500 /* We have either just popped the return address into the
24501 PC or it is was kept in LR for the entire function.
24502 Note that thumb_pop has already called thumb_exit if the
24503 PC was in the list. */
24504 if (!had_to_push_lr
)
24505 thumb_exit (asm_out_file
, LR_REGNUM
);
24509 /* Pop everything but the return address. */
24510 if (live_regs_mask
)
24511 thumb_pop (asm_out_file
, live_regs_mask
);
24513 if (had_to_push_lr
)
24517 /* We have no free low regs, so save one. */
24518 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24522 /* Get the return address into a temporary register. */
24523 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24527 /* Move the return address to lr. */
24528 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24530 /* Restore the low register. */
24531 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24536 regno
= LAST_ARG_REGNUM
;
24541 /* Remove the argument registers that were pushed onto the stack. */
24542 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24543 SP_REGNUM
, SP_REGNUM
,
24544 crtl
->args
.pretend_args_size
);
24546 thumb_exit (asm_out_file
, regno
);
24552 /* Functions to save and restore machine-specific function data. */
24553 static struct machine_function
*
24554 arm_init_machine_status (void)
24556 struct machine_function
*machine
;
24557 machine
= ggc_cleared_alloc
<machine_function
> ();
24559 #if ARM_FT_UNKNOWN != 0
24560 machine
->func_type
= ARM_FT_UNKNOWN
;
24565 /* Return an RTX indicating where the return address to the
24566 calling function can be found. */
24568 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24573 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24576 /* Do anything needed before RTL is emitted for each function. */
24578 arm_init_expanders (void)
24580 /* Arrange to initialize and mark the machine per-function status. */
24581 init_machine_status
= arm_init_machine_status
;
24583 /* This is to stop the combine pass optimizing away the alignment
24584 adjustment of va_arg. */
24585 /* ??? It is claimed that this should not be necessary. */
24587 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24590 /* Check that FUNC is called with a different mode. */
24593 arm_change_mode_p (tree func
)
24595 if (TREE_CODE (func
) != FUNCTION_DECL
)
24598 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24601 callee_tree
= target_option_default_node
;
24603 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24604 int flags
= callee_opts
->x_target_flags
;
24606 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24609 /* Like arm_compute_initial_elimination offset. Simpler because there
24610 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24611 to point at the base of the local variables after static stack
24612 space for a function has been allocated. */
24615 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24617 arm_stack_offsets
*offsets
;
24619 offsets
= arm_get_frame_offsets ();
24623 case ARG_POINTER_REGNUM
:
24626 case STACK_POINTER_REGNUM
:
24627 return offsets
->outgoing_args
- offsets
->saved_args
;
24629 case FRAME_POINTER_REGNUM
:
24630 return offsets
->soft_frame
- offsets
->saved_args
;
24632 case ARM_HARD_FRAME_POINTER_REGNUM
:
24633 return offsets
->saved_regs
- offsets
->saved_args
;
24635 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24636 return offsets
->locals_base
- offsets
->saved_args
;
24639 gcc_unreachable ();
24643 case FRAME_POINTER_REGNUM
:
24646 case STACK_POINTER_REGNUM
:
24647 return offsets
->outgoing_args
- offsets
->soft_frame
;
24649 case ARM_HARD_FRAME_POINTER_REGNUM
:
24650 return offsets
->saved_regs
- offsets
->soft_frame
;
24652 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24653 return offsets
->locals_base
- offsets
->soft_frame
;
24656 gcc_unreachable ();
24661 gcc_unreachable ();
24665 /* Generate the function's prologue. */
24668 thumb1_expand_prologue (void)
24672 HOST_WIDE_INT amount
;
24673 HOST_WIDE_INT size
;
24674 arm_stack_offsets
*offsets
;
24675 unsigned long func_type
;
24677 unsigned long live_regs_mask
;
24678 unsigned long l_mask
;
24679 unsigned high_regs_pushed
= 0;
24680 bool lr_needs_saving
;
24682 func_type
= arm_current_func_type ();
24684 /* Naked functions don't have prologues. */
24685 if (IS_NAKED (func_type
))
24687 if (flag_stack_usage_info
)
24688 current_function_static_stack_size
= 0;
24692 if (IS_INTERRUPT (func_type
))
24694 error ("interrupt Service Routines cannot be coded in Thumb mode");
24698 if (is_called_in_ARM_mode (current_function_decl
))
24699 emit_insn (gen_prologue_thumb1_interwork ());
24701 offsets
= arm_get_frame_offsets ();
24702 live_regs_mask
= offsets
->saved_regs_mask
;
24703 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24705 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24706 l_mask
= live_regs_mask
& 0x40ff;
24707 /* Then count how many other high registers will need to be pushed. */
24708 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24710 if (crtl
->args
.pretend_args_size
)
24712 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24714 if (cfun
->machine
->uses_anonymous_args
)
24716 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24717 unsigned long mask
;
24719 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24720 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24722 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24726 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24727 stack_pointer_rtx
, x
));
24729 RTX_FRAME_RELATED_P (insn
) = 1;
24732 if (TARGET_BACKTRACE
)
24734 HOST_WIDE_INT offset
= 0;
24735 unsigned work_register
;
24736 rtx work_reg
, x
, arm_hfp_rtx
;
24738 /* We have been asked to create a stack backtrace structure.
24739 The code looks like this:
24743 0 sub SP, #16 Reserve space for 4 registers.
24744 2 push {R7} Push low registers.
24745 4 add R7, SP, #20 Get the stack pointer before the push.
24746 6 str R7, [SP, #8] Store the stack pointer
24747 (before reserving the space).
24748 8 mov R7, PC Get hold of the start of this code + 12.
24749 10 str R7, [SP, #16] Store it.
24750 12 mov R7, FP Get hold of the current frame pointer.
24751 14 str R7, [SP, #4] Store it.
24752 16 mov R7, LR Get hold of the current return address.
24753 18 str R7, [SP, #12] Store it.
24754 20 add R7, SP, #16 Point at the start of the
24755 backtrace structure.
24756 22 mov FP, R7 Put this value into the frame pointer. */
24758 work_register
= thumb_find_work_register (live_regs_mask
);
24759 work_reg
= gen_rtx_REG (SImode
, work_register
);
24760 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24762 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24763 stack_pointer_rtx
, GEN_INT (-16)));
24764 RTX_FRAME_RELATED_P (insn
) = 1;
24768 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24769 RTX_FRAME_RELATED_P (insn
) = 1;
24770 lr_needs_saving
= false;
24772 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24775 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24776 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24778 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24779 x
= gen_frame_mem (SImode
, x
);
24780 emit_move_insn (x
, work_reg
);
24782 /* Make sure that the instruction fetching the PC is in the right place
24783 to calculate "start of backtrace creation code + 12". */
24784 /* ??? The stores using the common WORK_REG ought to be enough to
24785 prevent the scheduler from doing anything weird. Failing that
24786 we could always move all of the following into an UNSPEC_VOLATILE. */
24789 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24790 emit_move_insn (work_reg
, x
);
24792 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24793 x
= gen_frame_mem (SImode
, x
);
24794 emit_move_insn (x
, work_reg
);
24796 emit_move_insn (work_reg
, arm_hfp_rtx
);
24798 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24799 x
= gen_frame_mem (SImode
, x
);
24800 emit_move_insn (x
, work_reg
);
24804 emit_move_insn (work_reg
, arm_hfp_rtx
);
24806 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24807 x
= gen_frame_mem (SImode
, x
);
24808 emit_move_insn (x
, work_reg
);
24810 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24811 emit_move_insn (work_reg
, x
);
24813 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24814 x
= gen_frame_mem (SImode
, x
);
24815 emit_move_insn (x
, work_reg
);
24818 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24819 emit_move_insn (work_reg
, x
);
24821 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24822 x
= gen_frame_mem (SImode
, x
);
24823 emit_move_insn (x
, work_reg
);
24825 x
= GEN_INT (offset
+ 12);
24826 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24828 emit_move_insn (arm_hfp_rtx
, work_reg
);
24830 /* Optimization: If we are not pushing any low registers but we are going
24831 to push some high registers then delay our first push. This will just
24832 be a push of LR and we can combine it with the push of the first high
24834 else if ((l_mask
& 0xff) != 0
24835 || (high_regs_pushed
== 0 && lr_needs_saving
))
24837 unsigned long mask
= l_mask
;
24838 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24839 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24840 RTX_FRAME_RELATED_P (insn
) = 1;
24841 lr_needs_saving
= false;
24844 if (high_regs_pushed
)
24846 unsigned pushable_regs
;
24847 unsigned next_hi_reg
;
24848 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24849 : crtl
->args
.info
.nregs
;
24850 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24852 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24853 if (live_regs_mask
& (1 << next_hi_reg
))
24856 /* Here we need to mask out registers used for passing arguments
24857 even if they can be pushed. This is to avoid using them to stash the high
24858 registers. Such kind of stash may clobber the use of arguments. */
24859 pushable_regs
= l_mask
& (~arg_regs_mask
);
24860 if (lr_needs_saving
)
24861 pushable_regs
&= ~(1 << LR_REGNUM
);
24863 if (pushable_regs
== 0)
24864 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24866 while (high_regs_pushed
> 0)
24868 unsigned long real_regs_mask
= 0;
24869 unsigned long push_mask
= 0;
24871 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24873 if (pushable_regs
& (1 << regno
))
24875 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24876 gen_rtx_REG (SImode
, next_hi_reg
));
24878 high_regs_pushed
--;
24879 real_regs_mask
|= (1 << next_hi_reg
);
24880 push_mask
|= (1 << regno
);
24882 if (high_regs_pushed
)
24884 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
24886 if (live_regs_mask
& (1 << next_hi_reg
))
24894 /* If we had to find a work register and we have not yet
24895 saved the LR then add it to the list of regs to push. */
24896 if (lr_needs_saving
)
24898 push_mask
|= 1 << LR_REGNUM
;
24899 real_regs_mask
|= 1 << LR_REGNUM
;
24900 lr_needs_saving
= false;
24903 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
24904 RTX_FRAME_RELATED_P (insn
) = 1;
24908 /* Load the pic register before setting the frame pointer,
24909 so we can use r7 as a temporary work register. */
24910 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24911 arm_load_pic_register (live_regs_mask
);
24913 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
24914 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
24915 stack_pointer_rtx
);
24917 size
= offsets
->outgoing_args
- offsets
->saved_args
;
24918 if (flag_stack_usage_info
)
24919 current_function_static_stack_size
= size
;
24921 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24922 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
&& size
)
24923 sorry ("-fstack-check=specific for Thumb-1");
24925 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24926 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
24931 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
24932 GEN_INT (- amount
)));
24933 RTX_FRAME_RELATED_P (insn
) = 1;
24939 /* The stack decrement is too big for an immediate value in a single
24940 insn. In theory we could issue multiple subtracts, but after
24941 three of them it becomes more space efficient to place the full
24942 value in the constant pool and load into a register. (Also the
24943 ARM debugger really likes to see only one stack decrement per
24944 function). So instead we look for a scratch register into which
24945 we can load the decrement, and then we subtract this from the
24946 stack pointer. Unfortunately on the thumb the only available
24947 scratch registers are the argument registers, and we cannot use
24948 these as they may hold arguments to the function. Instead we
24949 attempt to locate a call preserved register which is used by this
24950 function. If we can find one, then we know that it will have
24951 been pushed at the start of the prologue and so we can corrupt
24953 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
24954 if (live_regs_mask
& (1 << regno
))
24957 gcc_assert(regno
<= LAST_LO_REGNUM
);
24959 reg
= gen_rtx_REG (SImode
, regno
);
24961 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
24963 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24964 stack_pointer_rtx
, reg
));
24966 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
24967 plus_constant (Pmode
, stack_pointer_rtx
,
24969 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
24970 RTX_FRAME_RELATED_P (insn
) = 1;
24974 if (frame_pointer_needed
)
24975 thumb_set_frame_pointer (offsets
);
24977 /* If we are profiling, make sure no instructions are scheduled before
24978 the call to mcount. Similarly if the user has requested no
24979 scheduling in the prolog. Similarly if we want non-call exceptions
24980 using the EABI unwinder, to prevent faulting instructions from being
24981 swapped with a stack adjustment. */
24982 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
24983 || (arm_except_unwind_info (&global_options
) == UI_TARGET
24984 && cfun
->can_throw_non_call_exceptions
))
24985 emit_insn (gen_blockage ());
24987 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
24988 if (live_regs_mask
& 0xff)
24989 cfun
->machine
->lr_save_eliminated
= 0;
24992 /* Clear caller saved registers not used to pass return values and leaked
24993 condition flags before exiting a cmse_nonsecure_entry function. */
24996 cmse_nonsecure_entry_clear_before_return (void)
24998 uint64_t to_clear_mask
[2];
24999 uint32_t padding_bits_to_clear
= 0;
25000 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25001 int regno
, maxregno
= IP_REGNUM
;
25005 to_clear_mask
[0] = (1ULL << (NUM_ARG_REGS
)) - 1;
25006 to_clear_mask
[0] |= (1ULL << IP_REGNUM
);
25008 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25009 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25010 to make sure the instructions used to clear them are present. */
25011 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
25013 uint64_t float_mask
= (1ULL << (D7_VFP_REGNUM
+ 1)) - 1;
25014 maxregno
= LAST_VFP_REGNUM
;
25016 float_mask
&= ~((1ULL << FIRST_VFP_REGNUM
) - 1);
25017 to_clear_mask
[0] |= float_mask
;
25019 float_mask
= (1ULL << (maxregno
- 63)) - 1;
25020 to_clear_mask
[1] = float_mask
;
25022 /* Make sure we don't clear the two scratch registers used to clear the
25023 relevant FPSCR bits in output_return_instruction. */
25024 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25025 to_clear_mask
[0] &= ~(1ULL << IP_REGNUM
);
25026 emit_use (gen_rtx_REG (SImode
, 4));
25027 to_clear_mask
[0] &= ~(1ULL << 4);
25030 /* If the user has defined registers to be caller saved, these are no longer
25031 restored by the function before returning and must thus be cleared for
25032 security purposes. */
25033 for (regno
= NUM_ARG_REGS
; regno
< LAST_VFP_REGNUM
; regno
++)
25035 /* We do not touch registers that can be used to pass arguments as per
25036 the AAPCS, since these should never be made callee-saved by user
25038 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25040 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25042 if (call_used_regs
[regno
])
25043 to_clear_mask
[regno
/ 64] |= (1ULL << (regno
% 64));
25046 /* Make sure we do not clear the registers used to return the result in. */
25047 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25048 if (!VOID_TYPE_P (result_type
))
25050 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25052 /* No need to check that we return in registers, because we don't
25053 support returning on stack yet. */
25055 &= ~compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25056 padding_bits_to_clear_ptr
);
25059 if (padding_bits_to_clear
!= 0)
25062 /* Padding bits to clear is not 0 so we know we are dealing with
25063 returning a composite type, which only uses r0. Let's make sure that
25064 r1-r3 is cleared too, we will use r1 as a scratch register. */
25065 gcc_assert ((to_clear_mask
[0] & 0xe) == 0xe);
25067 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25069 /* Fill the lower half of the negated padding_bits_to_clear. */
25070 emit_move_insn (reg_rtx
,
25071 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25073 /* Also fill the top half of the negated padding_bits_to_clear. */
25074 if (((~padding_bits_to_clear
) >> 16) > 0)
25075 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25078 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25080 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25081 gen_rtx_REG (SImode
, R0_REGNUM
),
25085 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25087 if (!(to_clear_mask
[regno
/ 64] & (1ULL << (regno
% 64))))
25090 if (IS_VFP_REGNUM (regno
))
25092 /* If regno is an even vfp register and its successor is also to
25093 be cleared, use vmov. */
25094 if (TARGET_VFP_DOUBLE
25095 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25096 && to_clear_mask
[regno
/ 64] & (1ULL << ((regno
% 64) + 1)))
25098 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25099 CONST1_RTX (DFmode
));
25100 emit_use (gen_rtx_REG (DFmode
, regno
));
25105 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25106 CONST1_RTX (SFmode
));
25107 emit_use (gen_rtx_REG (SFmode
, regno
));
25114 if (regno
== R0_REGNUM
)
25115 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25118 /* R0 has either been cleared before, see code above, or it
25119 holds a return value, either way it is not secret
25121 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25122 gen_rtx_REG (SImode
, R0_REGNUM
));
25123 emit_use (gen_rtx_REG (SImode
, regno
));
25127 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25128 gen_rtx_REG (SImode
, LR_REGNUM
));
25129 emit_use (gen_rtx_REG (SImode
, regno
));
25135 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25136 POP instruction can be generated. LR should be replaced by PC. All
25137 the checks required are already done by USE_RETURN_INSN (). Hence,
25138 all we really need to check here is if single register is to be
25139 returned, or multiple register return. */
25141 thumb2_expand_return (bool simple_return
)
25144 unsigned long saved_regs_mask
;
25145 arm_stack_offsets
*offsets
;
25147 offsets
= arm_get_frame_offsets ();
25148 saved_regs_mask
= offsets
->saved_regs_mask
;
25150 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25151 if (saved_regs_mask
& (1 << i
))
25154 if (!simple_return
&& saved_regs_mask
)
25156 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25157 functions or adapt code to handle according to ACLE. This path should
25158 not be reachable for cmse_nonsecure_entry functions though we prefer
25159 to assert it for now to ensure that future code changes do not silently
25160 change this behavior. */
25161 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25164 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25165 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25166 rtx addr
= gen_rtx_MEM (SImode
,
25167 gen_rtx_POST_INC (SImode
,
25168 stack_pointer_rtx
));
25169 set_mem_alias_set (addr
, get_frame_alias_set ());
25170 XVECEXP (par
, 0, 0) = ret_rtx
;
25171 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25172 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25173 emit_jump_insn (par
);
25177 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25178 saved_regs_mask
|= (1 << PC_REGNUM
);
25179 arm_emit_multi_reg_pop (saved_regs_mask
);
25184 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25185 cmse_nonsecure_entry_clear_before_return ();
25186 emit_jump_insn (simple_return_rtx
);
25191 thumb1_expand_epilogue (void)
25193 HOST_WIDE_INT amount
;
25194 arm_stack_offsets
*offsets
;
25197 /* Naked functions don't have prologues. */
25198 if (IS_NAKED (arm_current_func_type ()))
25201 offsets
= arm_get_frame_offsets ();
25202 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25204 if (frame_pointer_needed
)
25206 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25207 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25209 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25211 gcc_assert (amount
>= 0);
25214 emit_insn (gen_blockage ());
25217 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25218 GEN_INT (amount
)));
25221 /* r3 is always free in the epilogue. */
25222 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25224 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25225 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25229 /* Emit a USE (stack_pointer_rtx), so that
25230 the stack adjustment will not be deleted. */
25231 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25233 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25234 emit_insn (gen_blockage ());
25236 /* Emit a clobber for each insn that will be restored in the epilogue,
25237 so that flow2 will get register lifetimes correct. */
25238 for (regno
= 0; regno
< 13; regno
++)
25239 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25240 emit_clobber (gen_rtx_REG (SImode
, regno
));
25242 if (! df_regs_ever_live_p (LR_REGNUM
))
25243 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25245 /* Clear all caller-saved regs that are not used to return. */
25246 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25247 cmse_nonsecure_entry_clear_before_return ();
25250 /* Epilogue code for APCS frame. */
25252 arm_expand_epilogue_apcs_frame (bool really_return
)
25254 unsigned long func_type
;
25255 unsigned long saved_regs_mask
;
25258 int floats_from_frame
= 0;
25259 arm_stack_offsets
*offsets
;
25261 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25262 func_type
= arm_current_func_type ();
25264 /* Get frame offsets for ARM. */
25265 offsets
= arm_get_frame_offsets ();
25266 saved_regs_mask
= offsets
->saved_regs_mask
;
25268 /* Find the offset of the floating-point save area in the frame. */
25270 = (offsets
->saved_args
25271 + arm_compute_static_chain_stack_bytes ()
25274 /* Compute how many core registers saved and how far away the floats are. */
25275 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25276 if (saved_regs_mask
& (1 << i
))
25279 floats_from_frame
+= 4;
25282 if (TARGET_HARD_FLOAT
)
25285 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25287 /* The offset is from IP_REGNUM. */
25288 int saved_size
= arm_get_vfp_saved_size ();
25289 if (saved_size
> 0)
25292 floats_from_frame
+= saved_size
;
25293 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25294 hard_frame_pointer_rtx
,
25295 GEN_INT (-floats_from_frame
)));
25296 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25297 ip_rtx
, hard_frame_pointer_rtx
);
25300 /* Generate VFP register multi-pop. */
25301 start_reg
= FIRST_VFP_REGNUM
;
25303 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25304 /* Look for a case where a reg does not need restoring. */
25305 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25306 && (!df_regs_ever_live_p (i
+ 1)
25307 || call_used_regs
[i
+ 1]))
25309 if (start_reg
!= i
)
25310 arm_emit_vfp_multi_reg_pop (start_reg
,
25311 (i
- start_reg
) / 2,
25312 gen_rtx_REG (SImode
,
25317 /* Restore the remaining regs that we have discovered (or possibly
25318 even all of them, if the conditional in the for loop never
25320 if (start_reg
!= i
)
25321 arm_emit_vfp_multi_reg_pop (start_reg
,
25322 (i
- start_reg
) / 2,
25323 gen_rtx_REG (SImode
, IP_REGNUM
));
25328 /* The frame pointer is guaranteed to be non-double-word aligned, as
25329 it is set to double-word-aligned old_stack_pointer - 4. */
25331 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25333 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25334 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25336 rtx addr
= gen_frame_mem (V2SImode
,
25337 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25339 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25340 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25341 gen_rtx_REG (V2SImode
, i
),
25347 /* saved_regs_mask should contain IP which contains old stack pointer
25348 at the time of activation creation. Since SP and IP are adjacent registers,
25349 we can restore the value directly into SP. */
25350 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25351 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25352 saved_regs_mask
|= (1 << SP_REGNUM
);
25354 /* There are two registers left in saved_regs_mask - LR and PC. We
25355 only need to restore LR (the return address), but to
25356 save time we can load it directly into PC, unless we need a
25357 special function exit sequence, or we are not really returning. */
25359 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25360 && !crtl
->calls_eh_return
)
25361 /* Delete LR from the register mask, so that LR on
25362 the stack is loaded into the PC in the register mask. */
25363 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25365 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25367 num_regs
= bit_count (saved_regs_mask
);
25368 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25371 emit_insn (gen_blockage ());
25372 /* Unwind the stack to just below the saved registers. */
25373 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25374 hard_frame_pointer_rtx
,
25375 GEN_INT (- 4 * num_regs
)));
25377 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25378 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25381 arm_emit_multi_reg_pop (saved_regs_mask
);
25383 if (IS_INTERRUPT (func_type
))
25385 /* Interrupt handlers will have pushed the
25386 IP onto the stack, so restore it now. */
25388 rtx addr
= gen_rtx_MEM (SImode
,
25389 gen_rtx_POST_INC (SImode
,
25390 stack_pointer_rtx
));
25391 set_mem_alias_set (addr
, get_frame_alias_set ());
25392 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25393 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25394 gen_rtx_REG (SImode
, IP_REGNUM
),
25398 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25401 if (crtl
->calls_eh_return
)
25402 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25404 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25406 if (IS_STACKALIGN (func_type
))
25407 /* Restore the original stack pointer. Before prologue, the stack was
25408 realigned and the original stack pointer saved in r0. For details,
25409 see comment in arm_expand_prologue. */
25410 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25412 emit_jump_insn (simple_return_rtx
);
25415 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25416 function is not a sibcall. */
25418 arm_expand_epilogue (bool really_return
)
25420 unsigned long func_type
;
25421 unsigned long saved_regs_mask
;
25425 arm_stack_offsets
*offsets
;
25427 func_type
= arm_current_func_type ();
25429 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25430 let output_return_instruction take care of instruction emission if any. */
25431 if (IS_NAKED (func_type
)
25432 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25435 emit_jump_insn (simple_return_rtx
);
25439 /* If we are throwing an exception, then we really must be doing a
25440 return, so we can't tail-call. */
25441 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25443 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25445 arm_expand_epilogue_apcs_frame (really_return
);
25449 /* Get frame offsets for ARM. */
25450 offsets
= arm_get_frame_offsets ();
25451 saved_regs_mask
= offsets
->saved_regs_mask
;
25452 num_regs
= bit_count (saved_regs_mask
);
25454 if (frame_pointer_needed
)
25457 /* Restore stack pointer if necessary. */
25460 /* In ARM mode, frame pointer points to first saved register.
25461 Restore stack pointer to last saved register. */
25462 amount
= offsets
->frame
- offsets
->saved_regs
;
25464 /* Force out any pending memory operations that reference stacked data
25465 before stack de-allocation occurs. */
25466 emit_insn (gen_blockage ());
25467 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25468 hard_frame_pointer_rtx
,
25469 GEN_INT (amount
)));
25470 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25472 hard_frame_pointer_rtx
);
25474 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25476 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25480 /* In Thumb-2 mode, the frame pointer points to the last saved
25482 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25485 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25486 hard_frame_pointer_rtx
,
25487 GEN_INT (amount
)));
25488 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25489 hard_frame_pointer_rtx
,
25490 hard_frame_pointer_rtx
);
25493 /* Force out any pending memory operations that reference stacked data
25494 before stack de-allocation occurs. */
25495 emit_insn (gen_blockage ());
25496 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25497 hard_frame_pointer_rtx
));
25498 arm_add_cfa_adjust_cfa_note (insn
, 0,
25500 hard_frame_pointer_rtx
);
25501 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25503 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25508 /* Pop off outgoing args and local frame to adjust stack pointer to
25509 last saved register. */
25510 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25514 /* Force out any pending memory operations that reference stacked data
25515 before stack de-allocation occurs. */
25516 emit_insn (gen_blockage ());
25517 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25519 GEN_INT (amount
)));
25520 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25521 stack_pointer_rtx
, stack_pointer_rtx
);
25522 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25524 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25528 if (TARGET_HARD_FLOAT
)
25530 /* Generate VFP register multi-pop. */
25531 int end_reg
= LAST_VFP_REGNUM
+ 1;
25533 /* Scan the registers in reverse order. We need to match
25534 any groupings made in the prologue and generate matching
25535 vldm operations. The need to match groups is because,
25536 unlike pop, vldm can only do consecutive regs. */
25537 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25538 /* Look for a case where a reg does not need restoring. */
25539 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25540 && (!df_regs_ever_live_p (i
+ 1)
25541 || call_used_regs
[i
+ 1]))
25543 /* Restore the regs discovered so far (from reg+2 to
25545 if (end_reg
> i
+ 2)
25546 arm_emit_vfp_multi_reg_pop (i
+ 2,
25547 (end_reg
- (i
+ 2)) / 2,
25548 stack_pointer_rtx
);
25552 /* Restore the remaining regs that we have discovered (or possibly
25553 even all of them, if the conditional in the for loop never
25555 if (end_reg
> i
+ 2)
25556 arm_emit_vfp_multi_reg_pop (i
+ 2,
25557 (end_reg
- (i
+ 2)) / 2,
25558 stack_pointer_rtx
);
25562 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25563 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25566 rtx addr
= gen_rtx_MEM (V2SImode
,
25567 gen_rtx_POST_INC (SImode
,
25568 stack_pointer_rtx
));
25569 set_mem_alias_set (addr
, get_frame_alias_set ());
25570 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25571 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25572 gen_rtx_REG (V2SImode
, i
),
25574 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25575 stack_pointer_rtx
, stack_pointer_rtx
);
25578 if (saved_regs_mask
)
25581 bool return_in_pc
= false;
25583 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25584 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25585 && !IS_CMSE_ENTRY (func_type
)
25586 && !IS_STACKALIGN (func_type
)
25588 && crtl
->args
.pretend_args_size
== 0
25589 && saved_regs_mask
& (1 << LR_REGNUM
)
25590 && !crtl
->calls_eh_return
)
25592 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25593 saved_regs_mask
|= (1 << PC_REGNUM
);
25594 return_in_pc
= true;
25597 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25599 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25600 if (saved_regs_mask
& (1 << i
))
25602 rtx addr
= gen_rtx_MEM (SImode
,
25603 gen_rtx_POST_INC (SImode
,
25604 stack_pointer_rtx
));
25605 set_mem_alias_set (addr
, get_frame_alias_set ());
25607 if (i
== PC_REGNUM
)
25609 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25610 XVECEXP (insn
, 0, 0) = ret_rtx
;
25611 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25613 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25614 insn
= emit_jump_insn (insn
);
25618 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25620 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25621 gen_rtx_REG (SImode
, i
),
25623 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25625 stack_pointer_rtx
);
25632 && current_tune
->prefer_ldrd_strd
25633 && !optimize_function_for_size_p (cfun
))
25636 thumb2_emit_ldrd_pop (saved_regs_mask
);
25637 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25638 arm_emit_ldrd_pop (saved_regs_mask
);
25640 arm_emit_multi_reg_pop (saved_regs_mask
);
25643 arm_emit_multi_reg_pop (saved_regs_mask
);
25651 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25655 rtx dwarf
= NULL_RTX
;
25657 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25659 GEN_INT (amount
)));
25661 RTX_FRAME_RELATED_P (tmp
) = 1;
25663 if (cfun
->machine
->uses_anonymous_args
)
25665 /* Restore pretend args. Refer arm_expand_prologue on how to save
25666 pretend_args in stack. */
25667 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25668 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25669 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25670 if (saved_regs_mask
& (1 << i
))
25672 rtx reg
= gen_rtx_REG (SImode
, i
);
25673 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25676 REG_NOTES (tmp
) = dwarf
;
25678 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25679 stack_pointer_rtx
, stack_pointer_rtx
);
25682 /* Clear all caller-saved regs that are not used to return. */
25683 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25685 /* CMSE_ENTRY always returns. */
25686 gcc_assert (really_return
);
25687 cmse_nonsecure_entry_clear_before_return ();
25690 if (!really_return
)
25693 if (crtl
->calls_eh_return
)
25694 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25696 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25698 if (IS_STACKALIGN (func_type
))
25699 /* Restore the original stack pointer. Before prologue, the stack was
25700 realigned and the original stack pointer saved in r0. For details,
25701 see comment in arm_expand_prologue. */
25702 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25704 emit_jump_insn (simple_return_rtx
);
25707 /* Implementation of insn prologue_thumb1_interwork. This is the first
25708 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25711 thumb1_output_interwork (void)
25714 FILE *f
= asm_out_file
;
25716 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25717 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25719 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25721 /* Generate code sequence to switch us into Thumb mode. */
25722 /* The .code 32 directive has already been emitted by
25723 ASM_DECLARE_FUNCTION_NAME. */
25724 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25725 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25727 /* Generate a label, so that the debugger will notice the
25728 change in instruction sets. This label is also used by
25729 the assembler to bypass the ARM code when this function
25730 is called from a Thumb encoded function elsewhere in the
25731 same file. Hence the definition of STUB_NAME here must
25732 agree with the definition in gas/config/tc-arm.c. */
25734 #define STUB_NAME ".real_start_of"
25736 fprintf (f
, "\t.code\t16\n");
25738 if (arm_dllexport_name_p (name
))
25739 name
= arm_strip_name_encoding (name
);
25741 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25742 fprintf (f
, "\t.thumb_func\n");
25743 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25748 /* Handle the case of a double word load into a low register from
25749 a computed memory address. The computed address may involve a
25750 register which is overwritten by the load. */
25752 thumb_load_double_from_address (rtx
*operands
)
25760 gcc_assert (REG_P (operands
[0]));
25761 gcc_assert (MEM_P (operands
[1]));
25763 /* Get the memory address. */
25764 addr
= XEXP (operands
[1], 0);
25766 /* Work out how the memory address is computed. */
25767 switch (GET_CODE (addr
))
25770 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25772 if (REGNO (operands
[0]) == REGNO (addr
))
25774 output_asm_insn ("ldr\t%H0, %2", operands
);
25775 output_asm_insn ("ldr\t%0, %1", operands
);
25779 output_asm_insn ("ldr\t%0, %1", operands
);
25780 output_asm_insn ("ldr\t%H0, %2", operands
);
25785 /* Compute <address> + 4 for the high order load. */
25786 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25788 output_asm_insn ("ldr\t%0, %1", operands
);
25789 output_asm_insn ("ldr\t%H0, %2", operands
);
25793 arg1
= XEXP (addr
, 0);
25794 arg2
= XEXP (addr
, 1);
25796 if (CONSTANT_P (arg1
))
25797 base
= arg2
, offset
= arg1
;
25799 base
= arg1
, offset
= arg2
;
25801 gcc_assert (REG_P (base
));
25803 /* Catch the case of <address> = <reg> + <reg> */
25804 if (REG_P (offset
))
25806 int reg_offset
= REGNO (offset
);
25807 int reg_base
= REGNO (base
);
25808 int reg_dest
= REGNO (operands
[0]);
25810 /* Add the base and offset registers together into the
25811 higher destination register. */
25812 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25813 reg_dest
+ 1, reg_base
, reg_offset
);
25815 /* Load the lower destination register from the address in
25816 the higher destination register. */
25817 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25818 reg_dest
, reg_dest
+ 1);
25820 /* Load the higher destination register from its own address
25822 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25823 reg_dest
+ 1, reg_dest
+ 1);
25827 /* Compute <address> + 4 for the high order load. */
25828 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25830 /* If the computed address is held in the low order register
25831 then load the high order register first, otherwise always
25832 load the low order register first. */
25833 if (REGNO (operands
[0]) == REGNO (base
))
25835 output_asm_insn ("ldr\t%H0, %2", operands
);
25836 output_asm_insn ("ldr\t%0, %1", operands
);
25840 output_asm_insn ("ldr\t%0, %1", operands
);
25841 output_asm_insn ("ldr\t%H0, %2", operands
);
25847 /* With no registers to worry about we can just load the value
25849 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25851 output_asm_insn ("ldr\t%H0, %2", operands
);
25852 output_asm_insn ("ldr\t%0, %1", operands
);
25856 gcc_unreachable ();
25863 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
25868 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25869 std::swap (operands
[4], operands
[5]);
25871 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
25872 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
25876 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25877 std::swap (operands
[4], operands
[5]);
25878 if (REGNO (operands
[5]) > REGNO (operands
[6]))
25879 std::swap (operands
[5], operands
[6]);
25880 if (REGNO (operands
[4]) > REGNO (operands
[5]))
25881 std::swap (operands
[4], operands
[5]);
25883 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
25884 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
25888 gcc_unreachable ();
25894 /* Output a call-via instruction for thumb state. */
25896 thumb_call_via_reg (rtx reg
)
25898 int regno
= REGNO (reg
);
25901 gcc_assert (regno
< LR_REGNUM
);
25903 /* If we are in the normal text section we can use a single instance
25904 per compilation unit. If we are doing function sections, then we need
25905 an entry per section, since we can't rely on reachability. */
25906 if (in_section
== text_section
)
25908 thumb_call_reg_needed
= 1;
25910 if (thumb_call_via_label
[regno
] == NULL
)
25911 thumb_call_via_label
[regno
] = gen_label_rtx ();
25912 labelp
= thumb_call_via_label
+ regno
;
25916 if (cfun
->machine
->call_via
[regno
] == NULL
)
25917 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
25918 labelp
= cfun
->machine
->call_via
+ regno
;
25921 output_asm_insn ("bl\t%a0", labelp
);
25925 /* Routines for generating rtl. */
25927 thumb_expand_movmemqi (rtx
*operands
)
25929 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
25930 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
25931 HOST_WIDE_INT len
= INTVAL (operands
[2]);
25932 HOST_WIDE_INT offset
= 0;
25936 emit_insn (gen_movmem12b (out
, in
, out
, in
));
25942 emit_insn (gen_movmem8b (out
, in
, out
, in
));
25948 rtx reg
= gen_reg_rtx (SImode
);
25949 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
25950 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
25957 rtx reg
= gen_reg_rtx (HImode
);
25958 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
25959 plus_constant (Pmode
, in
,
25961 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
25970 rtx reg
= gen_reg_rtx (QImode
);
25971 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
25972 plus_constant (Pmode
, in
,
25974 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
25981 thumb_reload_out_hi (rtx
*operands
)
25983 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
25986 /* Return the length of a function name prefix
25987 that starts with the character 'c'. */
25989 arm_get_strip_length (int c
)
25993 ARM_NAME_ENCODING_LENGTHS
25998 /* Return a pointer to a function's name with any
25999 and all prefix encodings stripped from it. */
26001 arm_strip_name_encoding (const char *name
)
26005 while ((skip
= arm_get_strip_length (* name
)))
26011 /* If there is a '*' anywhere in the name's prefix, then
26012 emit the stripped name verbatim, otherwise prepend an
26013 underscore if leading underscores are being used. */
26015 arm_asm_output_labelref (FILE *stream
, const char *name
)
26020 while ((skip
= arm_get_strip_length (* name
)))
26022 verbatim
|= (*name
== '*');
26027 fputs (name
, stream
);
26029 asm_fprintf (stream
, "%U%s", name
);
26032 /* This function is used to emit an EABI tag and its associated value.
26033 We emit the numerical value of the tag in case the assembler does not
26034 support textual tags. (Eg gas prior to 2.20). If requested we include
26035 the tag name in a comment so that anyone reading the assembler output
26036 will know which tag is being set.
26038 This function is not static because arm-c.c needs it too. */
26041 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26043 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26044 if (flag_verbose_asm
|| flag_debug_asm
)
26045 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26046 asm_fprintf (asm_out_file
, "\n");
26049 /* This function is used to print CPU tuning information as comment
26050 in assembler file. Pointers are not printed for now. */
26053 arm_print_tune_info (void)
26055 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26056 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26057 current_tune
->constant_limit
);
26058 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26059 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26060 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26061 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26062 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26063 "prefetch.l1_cache_size:\t%d\n",
26064 current_tune
->prefetch
.l1_cache_size
);
26065 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26066 "prefetch.l1_cache_line_size:\t%d\n",
26067 current_tune
->prefetch
.l1_cache_line_size
);
26068 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26069 "prefer_constant_pool:\t%d\n",
26070 (int) current_tune
->prefer_constant_pool
);
26071 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26072 "branch_cost:\t(s:speed, p:predictable)\n");
26073 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26074 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26075 current_tune
->branch_cost (false, false));
26076 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26077 current_tune
->branch_cost (false, true));
26078 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26079 current_tune
->branch_cost (true, false));
26080 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26081 current_tune
->branch_cost (true, true));
26082 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26083 "prefer_ldrd_strd:\t%d\n",
26084 (int) current_tune
->prefer_ldrd_strd
);
26085 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26086 "logical_op_non_short_circuit:\t[%d,%d]\n",
26087 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26088 (int) current_tune
->logical_op_non_short_circuit_arm
);
26089 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26090 "prefer_neon_for_64bits:\t%d\n",
26091 (int) current_tune
->prefer_neon_for_64bits
);
26092 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26093 "disparage_flag_setting_t16_encodings:\t%d\n",
26094 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26095 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26096 "string_ops_prefer_neon:\t%d\n",
26097 (int) current_tune
->string_ops_prefer_neon
);
26098 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26099 "max_insns_inline_memset:\t%d\n",
26100 current_tune
->max_insns_inline_memset
);
26101 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26102 current_tune
->fusible_ops
);
26103 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26104 (int) current_tune
->sched_autopref
);
26108 arm_file_start (void)
26114 /* We don't have a specified CPU. Use the architecture to
26117 Note: it might be better to do this unconditionally, then the
26118 assembler would not need to know about all new CPU names as
26120 if (!arm_active_target
.core_name
)
26122 /* armv7ve doesn't support any extensions. */
26123 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26125 /* Keep backward compatability for assemblers
26126 which don't support armv7ve. */
26127 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26128 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26129 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26130 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26131 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26135 const char* pos
= strchr (arm_active_target
.arch_name
, '+');
26139 gcc_assert (strlen (arm_active_target
.arch_name
)
26140 <= sizeof (buf
) / sizeof (*pos
));
26141 strncpy (buf
, arm_active_target
.arch_name
,
26142 (pos
- arm_active_target
.arch_name
) * sizeof (*pos
));
26143 buf
[pos
- arm_active_target
.arch_name
] = '\0';
26144 asm_fprintf (asm_out_file
, "\t.arch %s\n", buf
);
26145 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", pos
+ 1);
26148 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26149 arm_active_target
.arch_name
);
26152 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26153 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26154 arm_active_target
.core_name
+ 8);
26157 const char* truncated_name
26158 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26159 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26162 if (print_tune_info
)
26163 arm_print_tune_info ();
26165 if (! TARGET_SOFT_FLOAT
)
26167 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26168 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26170 if (TARGET_HARD_FLOAT_ABI
)
26171 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26174 /* Some of these attributes only apply when the corresponding features
26175 are used. However we don't have any easy way of figuring this out.
26176 Conservatively record the setting that would have been used. */
26178 if (flag_rounding_math
)
26179 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26181 if (!flag_unsafe_math_optimizations
)
26183 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26184 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26186 if (flag_signaling_nans
)
26187 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26189 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26190 flag_finite_math_only
? 1 : 3);
26192 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26193 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26194 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26195 flag_short_enums
? 1 : 2);
26197 /* Tag_ABI_optimization_goals. */
26200 else if (optimize
>= 2)
26206 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26208 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26211 if (arm_fp16_format
)
26212 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26213 (int) arm_fp16_format
);
26215 if (arm_lang_output_object_attributes_hook
)
26216 arm_lang_output_object_attributes_hook();
26219 default_file_start ();
26223 arm_file_end (void)
26227 if (NEED_INDICATE_EXEC_STACK
)
26228 /* Add .note.GNU-stack. */
26229 file_end_indicate_exec_stack ();
26231 if (! thumb_call_reg_needed
)
26234 switch_to_section (text_section
);
26235 asm_fprintf (asm_out_file
, "\t.code 16\n");
26236 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26238 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26240 rtx label
= thumb_call_via_label
[regno
];
26244 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26245 CODE_LABEL_NUMBER (label
));
26246 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26252 /* Symbols in the text segment can be accessed without indirecting via the
26253 constant pool; it may take an extra binary operation, but this is still
26254 faster than indirecting via memory. Don't do this when not optimizing,
26255 since we won't be calculating al of the offsets necessary to do this
26259 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26261 if (optimize
> 0 && TREE_CONSTANT (decl
))
26262 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26264 default_encode_section_info (decl
, rtl
, first
);
26266 #endif /* !ARM_PE */
26269 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26271 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26272 && !strcmp (prefix
, "L"))
26274 arm_ccfsm_state
= 0;
26275 arm_target_insn
= NULL
;
26277 default_internal_label (stream
, prefix
, labelno
);
26280 /* Output code to add DELTA to the first argument, and then jump
26281 to FUNCTION. Used for C++ multiple inheritance. */
26284 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26285 HOST_WIDE_INT
, tree function
)
26287 static int thunk_label
= 0;
26290 int mi_delta
= delta
;
26291 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26293 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26296 mi_delta
= - mi_delta
;
26298 final_start_function (emit_barrier (), file
, 1);
26302 int labelno
= thunk_label
++;
26303 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26304 /* Thunks are entered in arm mode when available. */
26305 if (TARGET_THUMB1_ONLY
)
26307 /* push r3 so we can use it as a temporary. */
26308 /* TODO: Omit this save if r3 is not used. */
26309 fputs ("\tpush {r3}\n", file
);
26310 fputs ("\tldr\tr3, ", file
);
26314 fputs ("\tldr\tr12, ", file
);
26316 assemble_name (file
, label
);
26317 fputc ('\n', file
);
26320 /* If we are generating PIC, the ldr instruction below loads
26321 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26322 the address of the add + 8, so we have:
26324 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26327 Note that we have "+ 1" because some versions of GNU ld
26328 don't set the low bit of the result for R_ARM_REL32
26329 relocations against thumb function symbols.
26330 On ARMv6M this is +4, not +8. */
26331 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26332 assemble_name (file
, labelpc
);
26333 fputs (":\n", file
);
26334 if (TARGET_THUMB1_ONLY
)
26336 /* This is 2 insns after the start of the thunk, so we know it
26337 is 4-byte aligned. */
26338 fputs ("\tadd\tr3, pc, r3\n", file
);
26339 fputs ("\tmov r12, r3\n", file
);
26342 fputs ("\tadd\tr12, pc, r12\n", file
);
26344 else if (TARGET_THUMB1_ONLY
)
26345 fputs ("\tmov r12, r3\n", file
);
26347 if (TARGET_THUMB1_ONLY
)
26349 if (mi_delta
> 255)
26351 fputs ("\tldr\tr3, ", file
);
26352 assemble_name (file
, label
);
26353 fputs ("+4\n", file
);
26354 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26355 mi_op
, this_regno
, this_regno
);
26357 else if (mi_delta
!= 0)
26359 /* Thumb1 unified syntax requires s suffix in instruction name when
26360 one of the operands is immediate. */
26361 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26362 mi_op
, this_regno
, this_regno
,
26368 /* TODO: Use movw/movt for large constants when available. */
26369 while (mi_delta
!= 0)
26371 if ((mi_delta
& (3 << shift
)) == 0)
26375 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26376 mi_op
, this_regno
, this_regno
,
26377 mi_delta
& (0xff << shift
));
26378 mi_delta
&= ~(0xff << shift
);
26385 if (TARGET_THUMB1_ONLY
)
26386 fputs ("\tpop\t{r3}\n", file
);
26388 fprintf (file
, "\tbx\tr12\n");
26389 ASM_OUTPUT_ALIGN (file
, 2);
26390 assemble_name (file
, label
);
26391 fputs (":\n", file
);
26394 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26395 rtx tem
= XEXP (DECL_RTL (function
), 0);
26396 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26397 pipeline offset is four rather than eight. Adjust the offset
26399 tem
= plus_constant (GET_MODE (tem
), tem
,
26400 TARGET_THUMB1_ONLY
? -3 : -7);
26401 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26403 gen_rtx_SYMBOL_REF (Pmode
,
26404 ggc_strdup (labelpc
)));
26405 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26408 /* Output ".word .LTHUNKn". */
26409 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26411 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26412 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26416 fputs ("\tb\t", file
);
26417 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26418 if (NEED_PLT_RELOC
)
26419 fputs ("(PLT)", file
);
26420 fputc ('\n', file
);
26423 final_end_function ();
26426 /* MI thunk handling for TARGET_32BIT. */
26429 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26430 HOST_WIDE_INT vcall_offset
, tree function
)
26432 /* On ARM, this_regno is R0 or R1 depending on
26433 whether the function returns an aggregate or not.
26435 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26437 ? R1_REGNUM
: R0_REGNUM
);
26439 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26440 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26441 reload_completed
= 1;
26442 emit_note (NOTE_INSN_PROLOGUE_END
);
26444 /* Add DELTA to THIS_RTX. */
26446 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26447 delta
, this_rtx
, this_rtx
, false);
26449 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26450 if (vcall_offset
!= 0)
26452 /* Load *THIS_RTX. */
26453 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26454 /* Compute *THIS_RTX + VCALL_OFFSET. */
26455 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26457 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26458 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26459 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26462 /* Generate a tail call to the target function. */
26463 if (!TREE_USED (function
))
26465 assemble_external (function
);
26466 TREE_USED (function
) = 1;
26468 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26469 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26470 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26471 SIBLING_CALL_P (insn
) = 1;
26473 insn
= get_insns ();
26474 shorten_branches (insn
);
26475 final_start_function (insn
, file
, 1);
26476 final (insn
, file
, 1);
26477 final_end_function ();
26479 /* Stop pretending this is a post-reload pass. */
26480 reload_completed
= 0;
26483 /* Output code to add DELTA to the first argument, and then jump
26484 to FUNCTION. Used for C++ multiple inheritance. */
26487 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26488 HOST_WIDE_INT vcall_offset
, tree function
)
26491 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26493 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26497 arm_emit_vector_const (FILE *file
, rtx x
)
26500 const char * pattern
;
26502 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26504 switch (GET_MODE (x
))
26506 case V2SImode
: pattern
= "%08x"; break;
26507 case V4HImode
: pattern
= "%04x"; break;
26508 case V8QImode
: pattern
= "%02x"; break;
26509 default: gcc_unreachable ();
26512 fprintf (file
, "0x");
26513 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26517 element
= CONST_VECTOR_ELT (x
, i
);
26518 fprintf (file
, pattern
, INTVAL (element
));
26524 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26525 HFmode constant pool entries are actually loaded with ldr. */
26527 arm_emit_fp16_const (rtx c
)
26531 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26532 if (WORDS_BIG_ENDIAN
)
26533 assemble_zeros (2);
26534 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26535 if (!WORDS_BIG_ENDIAN
)
26536 assemble_zeros (2);
26540 arm_output_load_gr (rtx
*operands
)
26547 if (!MEM_P (operands
[1])
26548 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26549 || !REG_P (reg
= XEXP (sum
, 0))
26550 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26551 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26552 return "wldrw%?\t%0, %1";
26554 /* Fix up an out-of-range load of a GR register. */
26555 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26556 wcgr
= operands
[0];
26558 output_asm_insn ("ldr%?\t%0, %1", operands
);
26560 operands
[0] = wcgr
;
26562 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26563 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26568 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26570 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26571 named arg and all anonymous args onto the stack.
26572 XXX I know the prologue shouldn't be pushing registers, but it is faster
26576 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26580 int second_time ATTRIBUTE_UNUSED
)
26582 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26585 cfun
->machine
->uses_anonymous_args
= 1;
26586 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26588 nregs
= pcum
->aapcs_ncrn
;
26591 int res
= arm_needs_doubleword_align (mode
, type
);
26592 if (res
< 0 && warn_psabi
)
26593 inform (input_location
, "parameter passing for argument of "
26594 "type %qT changed in GCC 7.1", type
);
26600 nregs
= pcum
->nregs
;
26602 if (nregs
< NUM_ARG_REGS
)
26603 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26606 /* We can't rely on the caller doing the proper promotion when
26607 using APCS or ATPCS. */
26610 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26612 return !TARGET_AAPCS_BASED
;
26615 static machine_mode
26616 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26618 int *punsignedp ATTRIBUTE_UNUSED
,
26619 const_tree fntype ATTRIBUTE_UNUSED
,
26620 int for_return ATTRIBUTE_UNUSED
)
26622 if (GET_MODE_CLASS (mode
) == MODE_INT
26623 && GET_MODE_SIZE (mode
) < 4)
26631 arm_default_short_enums (void)
26633 return ARM_DEFAULT_SHORT_ENUMS
;
26637 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26640 arm_align_anon_bitfield (void)
26642 return TARGET_AAPCS_BASED
;
26646 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26649 arm_cxx_guard_type (void)
26651 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26655 /* The EABI says test the least significant bit of a guard variable. */
26658 arm_cxx_guard_mask_bit (void)
26660 return TARGET_AAPCS_BASED
;
26664 /* The EABI specifies that all array cookies are 8 bytes long. */
26667 arm_get_cookie_size (tree type
)
26671 if (!TARGET_AAPCS_BASED
)
26672 return default_cxx_get_cookie_size (type
);
26674 size
= build_int_cst (sizetype
, 8);
26679 /* The EABI says that array cookies should also contain the element size. */
26682 arm_cookie_has_size (void)
26684 return TARGET_AAPCS_BASED
;
26688 /* The EABI says constructors and destructors should return a pointer to
26689 the object constructed/destroyed. */
26692 arm_cxx_cdtor_returns_this (void)
26694 return TARGET_AAPCS_BASED
;
26697 /* The EABI says that an inline function may never be the key
26701 arm_cxx_key_method_may_be_inline (void)
26703 return !TARGET_AAPCS_BASED
;
26707 arm_cxx_determine_class_data_visibility (tree decl
)
26709 if (!TARGET_AAPCS_BASED
26710 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26713 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26714 is exported. However, on systems without dynamic vague linkage,
26715 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26716 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26717 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26719 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26720 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26724 arm_cxx_class_data_always_comdat (void)
26726 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26727 vague linkage if the class has no key function. */
26728 return !TARGET_AAPCS_BASED
;
26732 /* The EABI says __aeabi_atexit should be used to register static
26736 arm_cxx_use_aeabi_atexit (void)
26738 return TARGET_AAPCS_BASED
;
26743 arm_set_return_address (rtx source
, rtx scratch
)
26745 arm_stack_offsets
*offsets
;
26746 HOST_WIDE_INT delta
;
26748 unsigned long saved_regs
;
26750 offsets
= arm_get_frame_offsets ();
26751 saved_regs
= offsets
->saved_regs_mask
;
26753 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26754 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26757 if (frame_pointer_needed
)
26758 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26761 /* LR will be the first saved register. */
26762 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26767 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26768 GEN_INT (delta
& ~4095)));
26773 addr
= stack_pointer_rtx
;
26775 addr
= plus_constant (Pmode
, addr
, delta
);
26777 /* The store needs to be marked as frame related in order to prevent
26778 DSE from deleting it as dead if it is based on fp. */
26779 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26780 RTX_FRAME_RELATED_P (insn
) = 1;
26781 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26787 thumb_set_return_address (rtx source
, rtx scratch
)
26789 arm_stack_offsets
*offsets
;
26790 HOST_WIDE_INT delta
;
26791 HOST_WIDE_INT limit
;
26794 unsigned long mask
;
26798 offsets
= arm_get_frame_offsets ();
26799 mask
= offsets
->saved_regs_mask
;
26800 if (mask
& (1 << LR_REGNUM
))
26803 /* Find the saved regs. */
26804 if (frame_pointer_needed
)
26806 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26807 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26813 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26816 /* Allow for the stack frame. */
26817 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26819 /* The link register is always the first saved register. */
26822 /* Construct the address. */
26823 addr
= gen_rtx_REG (SImode
, reg
);
26826 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26827 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26831 addr
= plus_constant (Pmode
, addr
, delta
);
26833 /* The store needs to be marked as frame related in order to prevent
26834 DSE from deleting it as dead if it is based on fp. */
26835 rtx insn
= emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
26836 RTX_FRAME_RELATED_P (insn
) = 1;
26837 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (Pmode
, LR_REGNUM
));
26840 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26843 /* Implements target hook vector_mode_supported_p. */
26845 arm_vector_mode_supported_p (machine_mode mode
)
26847 /* Neon also supports V2SImode, etc. listed in the clause below. */
26848 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
26849 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
26850 || mode
== V2DImode
|| mode
== V8HFmode
))
26853 if ((TARGET_NEON
|| TARGET_IWMMXT
)
26854 && ((mode
== V2SImode
)
26855 || (mode
== V4HImode
)
26856 || (mode
== V8QImode
)))
26859 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
26860 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
26861 || mode
== V2HAmode
))
26867 /* Implements target hook array_mode_supported_p. */
26870 arm_array_mode_supported_p (machine_mode mode
,
26871 unsigned HOST_WIDE_INT nelems
)
26874 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
26875 && (nelems
>= 2 && nelems
<= 4))
26881 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26882 registers when autovectorizing for Neon, at least until multiple vector
26883 widths are supported properly by the middle-end. */
26885 static machine_mode
26886 arm_preferred_simd_mode (machine_mode mode
)
26892 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
26894 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
26896 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
26898 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
26900 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
26907 if (TARGET_REALLY_IWMMXT
)
26923 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26925 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26926 using r0-r4 for function arguments, r7 for the stack frame and don't have
26927 enough left over to do doubleword arithmetic. For Thumb-2 all the
26928 potentially problematic instructions accept high registers so this is not
26929 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26930 that require many low registers. */
26932 arm_class_likely_spilled_p (reg_class_t rclass
)
26934 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
26935 || rclass
== CC_REG
)
26941 /* Implements target hook small_register_classes_for_mode_p. */
26943 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
26945 return TARGET_THUMB1
;
26948 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26949 ARM insns and therefore guarantee that the shift count is modulo 256.
26950 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26951 guarantee no particular behavior for out-of-range counts. */
26953 static unsigned HOST_WIDE_INT
26954 arm_shift_truncation_mask (machine_mode mode
)
26956 return mode
== SImode
? 255 : 0;
26960 /* Map internal gcc register numbers to DWARF2 register numbers. */
26963 arm_dbx_register_number (unsigned int regno
)
26968 if (IS_VFP_REGNUM (regno
))
26970 /* See comment in arm_dwarf_register_span. */
26971 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
26972 return 64 + regno
- FIRST_VFP_REGNUM
;
26974 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
26977 if (IS_IWMMXT_GR_REGNUM (regno
))
26978 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
26980 if (IS_IWMMXT_REGNUM (regno
))
26981 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
26983 return DWARF_FRAME_REGISTERS
;
26986 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26987 GCC models tham as 64 32-bit registers, so we need to describe this to
26988 the DWARF generation code. Other registers can use the default. */
26990 arm_dwarf_register_span (rtx rtl
)
26998 regno
= REGNO (rtl
);
26999 if (!IS_VFP_REGNUM (regno
))
27002 /* XXX FIXME: The EABI defines two VFP register ranges:
27003 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27005 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27006 corresponding D register. Until GDB supports this, we shall use the
27007 legacy encodings. We also use these encodings for D0-D15 for
27008 compatibility with older debuggers. */
27009 mode
= GET_MODE (rtl
);
27010 if (GET_MODE_SIZE (mode
) < 8)
27013 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27015 nregs
= GET_MODE_SIZE (mode
) / 4;
27016 for (i
= 0; i
< nregs
; i
+= 2)
27017 if (TARGET_BIG_END
)
27019 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27020 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27024 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27025 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27030 nregs
= GET_MODE_SIZE (mode
) / 8;
27031 for (i
= 0; i
< nregs
; i
++)
27032 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27035 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27038 #if ARM_UNWIND_INFO
27039 /* Emit unwind directives for a store-multiple instruction or stack pointer
27040 push during alignment.
27041 These should only ever be generated by the function prologue code, so
27042 expect them to have a particular form.
27043 The store-multiple instruction sometimes pushes pc as the last register,
27044 although it should not be tracked into unwind information, or for -Os
27045 sometimes pushes some dummy registers before first register that needs
27046 to be tracked in unwind information; such dummy registers are there just
27047 to avoid separate stack adjustment, and will not be restored in the
27051 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27054 HOST_WIDE_INT offset
;
27055 HOST_WIDE_INT nregs
;
27059 unsigned padfirst
= 0, padlast
= 0;
27062 e
= XVECEXP (p
, 0, 0);
27063 gcc_assert (GET_CODE (e
) == SET
);
27065 /* First insn will adjust the stack pointer. */
27066 gcc_assert (GET_CODE (e
) == SET
27067 && REG_P (SET_DEST (e
))
27068 && REGNO (SET_DEST (e
)) == SP_REGNUM
27069 && GET_CODE (SET_SRC (e
)) == PLUS
);
27071 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27072 nregs
= XVECLEN (p
, 0) - 1;
27073 gcc_assert (nregs
);
27075 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27078 /* For -Os dummy registers can be pushed at the beginning to
27079 avoid separate stack pointer adjustment. */
27080 e
= XVECEXP (p
, 0, 1);
27081 e
= XEXP (SET_DEST (e
), 0);
27082 if (GET_CODE (e
) == PLUS
)
27083 padfirst
= INTVAL (XEXP (e
, 1));
27084 gcc_assert (padfirst
== 0 || optimize_size
);
27085 /* The function prologue may also push pc, but not annotate it as it is
27086 never restored. We turn this into a stack pointer adjustment. */
27087 e
= XVECEXP (p
, 0, nregs
);
27088 e
= XEXP (SET_DEST (e
), 0);
27089 if (GET_CODE (e
) == PLUS
)
27090 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27092 padlast
= offset
- 4;
27093 gcc_assert (padlast
== 0 || padlast
== 4);
27095 fprintf (asm_out_file
, "\t.pad #4\n");
27097 fprintf (asm_out_file
, "\t.save {");
27099 else if (IS_VFP_REGNUM (reg
))
27102 fprintf (asm_out_file
, "\t.vsave {");
27105 /* Unknown register type. */
27106 gcc_unreachable ();
27108 /* If the stack increment doesn't match the size of the saved registers,
27109 something has gone horribly wrong. */
27110 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27114 /* The remaining insns will describe the stores. */
27115 for (i
= 1; i
<= nregs
; i
++)
27117 /* Expect (set (mem <addr>) (reg)).
27118 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27119 e
= XVECEXP (p
, 0, i
);
27120 gcc_assert (GET_CODE (e
) == SET
27121 && MEM_P (SET_DEST (e
))
27122 && REG_P (SET_SRC (e
)));
27124 reg
= REGNO (SET_SRC (e
));
27125 gcc_assert (reg
>= lastreg
);
27128 fprintf (asm_out_file
, ", ");
27129 /* We can't use %r for vfp because we need to use the
27130 double precision register names. */
27131 if (IS_VFP_REGNUM (reg
))
27132 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27134 asm_fprintf (asm_out_file
, "%r", reg
);
27138 /* Check that the addresses are consecutive. */
27139 e
= XEXP (SET_DEST (e
), 0);
27140 if (GET_CODE (e
) == PLUS
)
27141 gcc_assert (REG_P (XEXP (e
, 0))
27142 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27143 && CONST_INT_P (XEXP (e
, 1))
27144 && offset
== INTVAL (XEXP (e
, 1)));
27148 && REGNO (e
) == SP_REGNUM
);
27149 offset
+= reg_size
;
27152 fprintf (asm_out_file
, "}\n");
27154 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27157 /* Emit unwind directives for a SET. */
27160 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27168 switch (GET_CODE (e0
))
27171 /* Pushing a single register. */
27172 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27173 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27174 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27177 asm_fprintf (asm_out_file
, "\t.save ");
27178 if (IS_VFP_REGNUM (REGNO (e1
)))
27179 asm_fprintf(asm_out_file
, "{d%d}\n",
27180 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27182 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27186 if (REGNO (e0
) == SP_REGNUM
)
27188 /* A stack increment. */
27189 if (GET_CODE (e1
) != PLUS
27190 || !REG_P (XEXP (e1
, 0))
27191 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27192 || !CONST_INT_P (XEXP (e1
, 1)))
27195 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27196 -INTVAL (XEXP (e1
, 1)));
27198 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27200 HOST_WIDE_INT offset
;
27202 if (GET_CODE (e1
) == PLUS
)
27204 if (!REG_P (XEXP (e1
, 0))
27205 || !CONST_INT_P (XEXP (e1
, 1)))
27207 reg
= REGNO (XEXP (e1
, 0));
27208 offset
= INTVAL (XEXP (e1
, 1));
27209 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27210 HARD_FRAME_POINTER_REGNUM
, reg
,
27213 else if (REG_P (e1
))
27216 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27217 HARD_FRAME_POINTER_REGNUM
, reg
);
27222 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27224 /* Move from sp to reg. */
27225 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27227 else if (GET_CODE (e1
) == PLUS
27228 && REG_P (XEXP (e1
, 0))
27229 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27230 && CONST_INT_P (XEXP (e1
, 1)))
27232 /* Set reg to offset from sp. */
27233 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27234 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27246 /* Emit unwind directives for the given insn. */
27249 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27252 bool handled_one
= false;
27254 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27257 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27258 && (TREE_NOTHROW (current_function_decl
)
27259 || crtl
->all_throwers_are_sibcalls
))
27262 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27265 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27267 switch (REG_NOTE_KIND (note
))
27269 case REG_FRAME_RELATED_EXPR
:
27270 pat
= XEXP (note
, 0);
27273 case REG_CFA_REGISTER
:
27274 pat
= XEXP (note
, 0);
27277 pat
= PATTERN (insn
);
27278 if (GET_CODE (pat
) == PARALLEL
)
27279 pat
= XVECEXP (pat
, 0, 0);
27282 /* Only emitted for IS_STACKALIGN re-alignment. */
27287 src
= SET_SRC (pat
);
27288 dest
= SET_DEST (pat
);
27290 gcc_assert (src
== stack_pointer_rtx
);
27291 reg
= REGNO (dest
);
27292 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27295 handled_one
= true;
27298 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27299 to get correct dwarf information for shrink-wrap. We should not
27300 emit unwind information for it because these are used either for
27301 pretend arguments or notes to adjust sp and restore registers from
27303 case REG_CFA_DEF_CFA
:
27304 case REG_CFA_ADJUST_CFA
:
27305 case REG_CFA_RESTORE
:
27308 case REG_CFA_EXPRESSION
:
27309 case REG_CFA_OFFSET
:
27310 /* ??? Only handling here what we actually emit. */
27311 gcc_unreachable ();
27319 pat
= PATTERN (insn
);
27322 switch (GET_CODE (pat
))
27325 arm_unwind_emit_set (asm_out_file
, pat
);
27329 /* Store multiple. */
27330 arm_unwind_emit_sequence (asm_out_file
, pat
);
27339 /* Output a reference from a function exception table to the type_info
27340 object X. The EABI specifies that the symbol should be relocated by
27341 an R_ARM_TARGET2 relocation. */
27344 arm_output_ttype (rtx x
)
27346 fputs ("\t.word\t", asm_out_file
);
27347 output_addr_const (asm_out_file
, x
);
27348 /* Use special relocations for symbol references. */
27349 if (!CONST_INT_P (x
))
27350 fputs ("(TARGET2)", asm_out_file
);
27351 fputc ('\n', asm_out_file
);
27356 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27359 arm_asm_emit_except_personality (rtx personality
)
27361 fputs ("\t.personality\t", asm_out_file
);
27362 output_addr_const (asm_out_file
, personality
);
27363 fputc ('\n', asm_out_file
);
27365 #endif /* ARM_UNWIND_INFO */
27367 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27370 arm_asm_init_sections (void)
27372 #if ARM_UNWIND_INFO
27373 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27375 #endif /* ARM_UNWIND_INFO */
27377 #ifdef OBJECT_FORMAT_ELF
27378 if (target_pure_code
)
27379 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27383 /* Output unwind directives for the start/end of a function. */
27386 arm_output_fn_unwind (FILE * f
, bool prologue
)
27388 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27392 fputs ("\t.fnstart\n", f
);
27395 /* If this function will never be unwound, then mark it as such.
27396 The came condition is used in arm_unwind_emit to suppress
27397 the frame annotations. */
27398 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27399 && (TREE_NOTHROW (current_function_decl
)
27400 || crtl
->all_throwers_are_sibcalls
))
27401 fputs("\t.cantunwind\n", f
);
27403 fputs ("\t.fnend\n", f
);
27408 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27410 enum tls_reloc reloc
;
27413 val
= XVECEXP (x
, 0, 0);
27414 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27416 output_addr_const (fp
, val
);
27421 fputs ("(tlsgd)", fp
);
27424 fputs ("(tlsldm)", fp
);
27427 fputs ("(tlsldo)", fp
);
27430 fputs ("(gottpoff)", fp
);
27433 fputs ("(tpoff)", fp
);
27436 fputs ("(tlsdesc)", fp
);
27439 gcc_unreachable ();
27448 fputs (" + (. - ", fp
);
27449 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27450 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27451 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27452 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27462 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27465 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27467 gcc_assert (size
== 4);
27468 fputs ("\t.word\t", file
);
27469 output_addr_const (file
, x
);
27470 fputs ("(tlsldo)", file
);
27473 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27476 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27478 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27479 return arm_emit_tls_decoration (fp
, x
);
27480 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27483 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27485 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27486 assemble_name_raw (fp
, label
);
27490 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27492 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27496 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27500 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27502 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27506 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27510 else if (GET_CODE (x
) == CONST_VECTOR
)
27511 return arm_emit_vector_const (fp
, x
);
27516 /* Output assembly for a shift instruction.
27517 SET_FLAGS determines how the instruction modifies the condition codes.
27518 0 - Do not set condition codes.
27519 1 - Set condition codes.
27520 2 - Use smallest instruction. */
27522 arm_output_shift(rtx
* operands
, int set_flags
)
27525 static const char flag_chars
[3] = {'?', '.', '!'};
27530 c
= flag_chars
[set_flags
];
27531 shift
= shift_op(operands
[3], &val
);
27535 operands
[2] = GEN_INT(val
);
27536 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27539 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27541 output_asm_insn (pattern
, operands
);
27545 /* Output assembly for a WMMX immediate shift instruction. */
27547 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27549 int shift
= INTVAL (operands
[2]);
27551 machine_mode opmode
= GET_MODE (operands
[0]);
27553 gcc_assert (shift
>= 0);
27555 /* If the shift value in the register versions is > 63 (for D qualifier),
27556 31 (for W qualifier) or 15 (for H qualifier). */
27557 if (((opmode
== V4HImode
) && (shift
> 15))
27558 || ((opmode
== V2SImode
) && (shift
> 31))
27559 || ((opmode
== DImode
) && (shift
> 63)))
27563 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27564 output_asm_insn (templ
, operands
);
27565 if (opmode
== DImode
)
27567 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27568 output_asm_insn (templ
, operands
);
27573 /* The destination register will contain all zeros. */
27574 sprintf (templ
, "wzero\t%%0");
27575 output_asm_insn (templ
, operands
);
27580 if ((opmode
== DImode
) && (shift
> 32))
27582 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27583 output_asm_insn (templ
, operands
);
27584 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27585 output_asm_insn (templ
, operands
);
27589 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27590 output_asm_insn (templ
, operands
);
27595 /* Output assembly for a WMMX tinsr instruction. */
27597 arm_output_iwmmxt_tinsr (rtx
*operands
)
27599 int mask
= INTVAL (operands
[3]);
27602 int units
= mode_nunits
[GET_MODE (operands
[0])];
27603 gcc_assert ((mask
& (mask
- 1)) == 0);
27604 for (i
= 0; i
< units
; ++i
)
27606 if ((mask
& 0x01) == 1)
27612 gcc_assert (i
< units
);
27614 switch (GET_MODE (operands
[0]))
27617 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27620 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27623 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27626 gcc_unreachable ();
27629 output_asm_insn (templ
, operands
);
27634 /* Output a Thumb-1 casesi dispatch sequence. */
27636 thumb1_output_casesi (rtx
*operands
)
27638 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27640 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27642 switch (GET_MODE(diff_vec
))
27645 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27646 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27648 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27649 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27651 return "bl\t%___gnu_thumb1_case_si";
27653 gcc_unreachable ();
27657 /* Output a Thumb-2 casesi instruction. */
27659 thumb2_output_casesi (rtx
*operands
)
27661 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27663 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27665 output_asm_insn ("cmp\t%0, %1", operands
);
27666 output_asm_insn ("bhi\t%l3", operands
);
27667 switch (GET_MODE(diff_vec
))
27670 return "tbb\t[%|pc, %0]";
27672 return "tbh\t[%|pc, %0, lsl #1]";
27676 output_asm_insn ("adr\t%4, %l2", operands
);
27677 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27678 output_asm_insn ("add\t%4, %4, %5", operands
);
27683 output_asm_insn ("adr\t%4, %l2", operands
);
27684 return "ldr\t%|pc, [%4, %0, lsl #2]";
27687 gcc_unreachable ();
27691 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27692 per-core tuning structs. */
27694 arm_issue_rate (void)
27696 return current_tune
->issue_rate
;
27699 /* Return how many instructions should scheduler lookahead to choose the
27702 arm_first_cycle_multipass_dfa_lookahead (void)
27704 int issue_rate
= arm_issue_rate ();
27706 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27709 /* Enable modeling of L2 auto-prefetcher. */
27711 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27713 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27717 arm_mangle_type (const_tree type
)
27719 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27720 has to be managled as if it is in the "std" namespace. */
27721 if (TARGET_AAPCS_BASED
27722 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27723 return "St9__va_list";
27725 /* Half-precision float. */
27726 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27729 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27731 if (TYPE_NAME (type
) != NULL
)
27732 return arm_mangle_builtin_type (type
);
27734 /* Use the default mangling. */
27738 /* Order of allocation of core registers for Thumb: this allocation is
27739 written over the corresponding initial entries of the array
27740 initialized with REG_ALLOC_ORDER. We allocate all low registers
27741 first. Saving and restoring a low register is usually cheaper than
27742 using a call-clobbered high register. */
27744 static const int thumb_core_reg_alloc_order
[] =
27746 3, 2, 1, 0, 4, 5, 6, 7,
27747 12, 14, 8, 9, 10, 11
27750 /* Adjust register allocation order when compiling for Thumb. */
27753 arm_order_regs_for_local_alloc (void)
27755 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27756 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27758 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27759 sizeof (thumb_core_reg_alloc_order
));
27762 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27765 arm_frame_pointer_required (void)
27767 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27770 /* If the function receives nonlocal gotos, it needs to save the frame
27771 pointer in the nonlocal_goto_save_area object. */
27772 if (cfun
->has_nonlocal_label
)
27775 /* The frame pointer is required for non-leaf APCS frames. */
27776 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27779 /* If we are probing the stack in the prologue, we will have a faulting
27780 instruction prior to the stack adjustment and this requires a frame
27781 pointer if we want to catch the exception using the EABI unwinder. */
27782 if (!IS_INTERRUPT (arm_current_func_type ())
27783 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27784 && arm_except_unwind_info (&global_options
) == UI_TARGET
27785 && cfun
->can_throw_non_call_exceptions
)
27787 HOST_WIDE_INT size
= get_frame_size ();
27789 /* That's irrelevant if there is no stack adjustment. */
27793 /* That's relevant only if there is a stack probe. */
27794 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27796 /* We don't have the final size of the frame so adjust. */
27797 size
+= 32 * UNITS_PER_WORD
;
27798 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
27808 /* Only thumb1 can't support conditional execution, so return true if
27809 the target is not thumb1. */
27811 arm_have_conditional_execution (void)
27813 return !TARGET_THUMB1
;
27816 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27817 static HOST_WIDE_INT
27818 arm_vector_alignment (const_tree type
)
27820 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27822 if (TARGET_AAPCS_BASED
)
27823 align
= MIN (align
, 64);
27828 static unsigned int
27829 arm_autovectorize_vector_sizes (void)
27831 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27835 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27837 /* Vectors which aren't in packed structures will not be less aligned than
27838 the natural alignment of their element type, so this is safe. */
27839 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27842 return default_builtin_vector_alignment_reachable (type
, is_packed
);
27846 arm_builtin_support_vector_misalignment (machine_mode mode
,
27847 const_tree type
, int misalignment
,
27850 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27852 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
27857 /* If the misalignment is unknown, we should be able to handle the access
27858 so long as it is not to a member of a packed data structure. */
27859 if (misalignment
== -1)
27862 /* Return true if the misalignment is a multiple of the natural alignment
27863 of the vector's element type. This is probably always going to be
27864 true in practice, since we've already established that this isn't a
27866 return ((misalignment
% align
) == 0);
27869 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
27874 arm_conditional_register_usage (void)
27878 if (TARGET_THUMB1
&& optimize_size
)
27880 /* When optimizing for size on Thumb-1, it's better not
27881 to use the HI regs, because of the overhead of
27883 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
27884 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
27887 /* The link register can be clobbered by any branch insn,
27888 but we have no way to track that at present, so mark
27889 it as unavailable. */
27891 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
27893 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
27895 /* VFPv3 registers are disabled when earlier VFP
27896 versions are selected due to the definition of
27897 LAST_VFP_REGNUM. */
27898 for (regno
= FIRST_VFP_REGNUM
;
27899 regno
<= LAST_VFP_REGNUM
; ++ regno
)
27901 fixed_regs
[regno
] = 0;
27902 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
27903 || regno
>= FIRST_VFP_REGNUM
+ 32;
27907 if (TARGET_REALLY_IWMMXT
)
27909 regno
= FIRST_IWMMXT_GR_REGNUM
;
27910 /* The 2002/10/09 revision of the XScale ABI has wCG0
27911 and wCG1 as call-preserved registers. The 2002/11/21
27912 revision changed this so that all wCG registers are
27913 scratch registers. */
27914 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
27915 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
27916 fixed_regs
[regno
] = 0;
27917 /* The XScale ABI has wR0 - wR9 as scratch registers,
27918 the rest as call-preserved registers. */
27919 for (regno
= FIRST_IWMMXT_REGNUM
;
27920 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
27922 fixed_regs
[regno
] = 0;
27923 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
27927 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
27929 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27930 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
27932 else if (TARGET_APCS_STACK
)
27934 fixed_regs
[10] = 1;
27935 call_used_regs
[10] = 1;
27937 /* -mcaller-super-interworking reserves r11 for calls to
27938 _interwork_r11_call_via_rN(). Making the register global
27939 is an easy way of ensuring that it remains valid for all
27941 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
27942 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
27944 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27945 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27946 if (TARGET_CALLER_INTERWORKING
)
27947 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
27949 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27953 arm_preferred_rename_class (reg_class_t rclass
)
27955 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27956 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27957 and code size can be reduced. */
27958 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
27964 /* Compute the attribute "length" of insn "*push_multi".
27965 So this function MUST be kept in sync with that insn pattern. */
27967 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
27969 int i
, regno
, hi_reg
;
27970 int num_saves
= XVECLEN (parallel_op
, 0);
27980 regno
= REGNO (first_op
);
27981 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27982 list is 8-bit. Normally this means all registers in the list must be
27983 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27984 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27985 with 16-bit encoding. */
27986 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27987 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
27989 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
27990 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
27998 /* Compute the attribute "length" of insn. Currently, this function is used
27999 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28000 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28001 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28002 true if OPERANDS contains insn which explicit updates base register. */
28005 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28014 rtx parallel_op
= operands
[0];
28015 /* Initialize to elements number of PARALLEL. */
28016 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28017 /* Initialize the value to base register. */
28018 unsigned regno
= REGNO (operands
[1]);
28019 /* Skip return and write back pattern.
28020 We only need register pop pattern for later analysis. */
28021 unsigned first_indx
= 0;
28022 first_indx
+= return_pc
? 1 : 0;
28023 first_indx
+= write_back_p
? 1 : 0;
28025 /* A pop operation can be done through LDM or POP. If the base register is SP
28026 and if it's with write back, then a LDM will be alias of POP. */
28027 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28028 bool ldm_p
= !pop_p
;
28030 /* Check base register for LDM. */
28031 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28034 /* Check each register in the list. */
28035 for (; indx
>= first_indx
; indx
--)
28037 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28038 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28039 comment in arm_attr_length_push_multi. */
28040 if (REGNO_REG_CLASS (regno
) == HI_REGS
28041 && (regno
!= PC_REGNUM
|| ldm_p
))
28048 /* Compute the number of instructions emitted by output_move_double. */
28050 arm_count_output_move_double_insns (rtx
*operands
)
28054 /* output_move_double may modify the operands array, so call it
28055 here on a copy of the array. */
28056 ops
[0] = operands
[0];
28057 ops
[1] = operands
[1];
28058 output_move_double (ops
, false, &count
);
28063 vfp3_const_double_for_fract_bits (rtx operand
)
28065 REAL_VALUE_TYPE r0
;
28067 if (!CONST_DOUBLE_P (operand
))
28070 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28071 if (exact_real_inverse (DFmode
, &r0
)
28072 && !REAL_VALUE_NEGATIVE (r0
))
28074 if (exact_real_truncate (DFmode
, &r0
))
28076 HOST_WIDE_INT value
= real_to_integer (&r0
);
28077 value
= value
& 0xffffffff;
28078 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28080 int ret
= exact_log2 (value
);
28081 gcc_assert (IN_RANGE (ret
, 0, 31));
28089 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28090 log2 is in [1, 32], return that log2. Otherwise return -1.
28091 This is used in the patterns for vcvt.s32.f32 floating-point to
28092 fixed-point conversions. */
28095 vfp3_const_double_for_bits (rtx x
)
28097 const REAL_VALUE_TYPE
*r
;
28099 if (!CONST_DOUBLE_P (x
))
28102 r
= CONST_DOUBLE_REAL_VALUE (x
);
28104 if (REAL_VALUE_NEGATIVE (*r
)
28105 || REAL_VALUE_ISNAN (*r
)
28106 || REAL_VALUE_ISINF (*r
)
28107 || !real_isinteger (r
, SFmode
))
28110 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28112 /* The exact_log2 above will have returned -1 if this is
28113 not an exact log2. */
28114 if (!IN_RANGE (hwint
, 1, 32))
28121 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28124 arm_pre_atomic_barrier (enum memmodel model
)
28126 if (need_atomic_barrier_p (model
, true))
28127 emit_insn (gen_memory_barrier ());
28131 arm_post_atomic_barrier (enum memmodel model
)
28133 if (need_atomic_barrier_p (model
, false))
28134 emit_insn (gen_memory_barrier ());
28137 /* Emit the load-exclusive and store-exclusive instructions.
28138 Use acquire and release versions if necessary. */
28141 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28143 rtx (*gen
) (rtx
, rtx
);
28149 case QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28150 case HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28151 case SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28152 case DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28154 gcc_unreachable ();
28161 case QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28162 case HImode
: gen
= gen_arm_load_exclusivehi
; break;
28163 case SImode
: gen
= gen_arm_load_exclusivesi
; break;
28164 case DImode
: gen
= gen_arm_load_exclusivedi
; break;
28166 gcc_unreachable ();
28170 emit_insn (gen (rval
, mem
));
28174 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28177 rtx (*gen
) (rtx
, rtx
, rtx
);
28183 case QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28184 case HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28185 case SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28186 case DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28188 gcc_unreachable ();
28195 case QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28196 case HImode
: gen
= gen_arm_store_exclusivehi
; break;
28197 case SImode
: gen
= gen_arm_store_exclusivesi
; break;
28198 case DImode
: gen
= gen_arm_store_exclusivedi
; break;
28200 gcc_unreachable ();
28204 emit_insn (gen (bval
, rval
, mem
));
28207 /* Mark the previous jump instruction as unlikely. */
28210 emit_unlikely_jump (rtx insn
)
28212 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
28214 rtx_insn
*jump
= emit_jump_insn (insn
);
28215 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
28218 /* Expand a compare and swap pattern. */
28221 arm_expand_compare_and_swap (rtx operands
[])
28223 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28225 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28227 bval
= operands
[0];
28228 rval
= operands
[1];
28230 oldval
= operands
[3];
28231 newval
= operands
[4];
28232 is_weak
= operands
[5];
28233 mod_s
= operands
[6];
28234 mod_f
= operands
[7];
28235 mode
= GET_MODE (mem
);
28237 /* Normally the succ memory model must be stronger than fail, but in the
28238 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28239 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28241 if (TARGET_HAVE_LDACQ
28242 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28243 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28244 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28250 /* For narrow modes, we're going to perform the comparison in SImode,
28251 so do the zero-extension now. */
28252 rval
= gen_reg_rtx (SImode
);
28253 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28257 /* Force the value into a register if needed. We waited until after
28258 the zero-extension above to do this properly. */
28259 if (!arm_add_operand (oldval
, SImode
))
28260 oldval
= force_reg (SImode
, oldval
);
28264 if (!cmpdi_operand (oldval
, mode
))
28265 oldval
= force_reg (mode
, oldval
);
28269 gcc_unreachable ();
28276 case QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28277 case HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28278 case SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28279 case DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28281 gcc_unreachable ();
28288 case QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28289 case HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28290 case SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28291 case DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28293 gcc_unreachable ();
28297 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28298 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28300 if (mode
== QImode
|| mode
== HImode
)
28301 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28303 /* In all cases, we arrange for success to be signaled by Z set.
28304 This arrangement allows for the boolean result to be used directly
28305 in a subsequent branch, post optimization. For Thumb-1 targets, the
28306 boolean negation of the result is also stored in bval because Thumb-1
28307 backend lacks dependency tracking for CC flag due to flag-setting not
28308 being represented at RTL level. */
28310 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28313 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28314 emit_insn (gen_rtx_SET (bval
, x
));
28318 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28319 another memory store between the load-exclusive and store-exclusive can
28320 reset the monitor from Exclusive to Open state. This means we must wait
28321 until after reload to split the pattern, lest we get a register spill in
28322 the middle of the atomic sequence. Success of the compare and swap is
28323 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28324 for Thumb-1 targets (ie. negation of the boolean value returned by
28325 atomic_compare_and_swapmode standard pattern in operand 0). */
28328 arm_split_compare_and_swap (rtx operands
[])
28330 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28332 enum memmodel mod_s
, mod_f
;
28334 rtx_code_label
*label1
, *label2
;
28337 rval
= operands
[1];
28339 oldval
= operands
[3];
28340 newval
= operands
[4];
28341 is_weak
= (operands
[5] != const0_rtx
);
28342 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28343 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28344 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28345 mode
= GET_MODE (mem
);
28347 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28349 bool use_acquire
= TARGET_HAVE_LDACQ
28350 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28351 || is_mm_release (mod_s
));
28353 bool use_release
= TARGET_HAVE_LDACQ
28354 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28355 || is_mm_acquire (mod_s
));
28357 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28358 a full barrier is emitted after the store-release. */
28360 use_acquire
= false;
28362 /* Checks whether a barrier is needed and emits one accordingly. */
28363 if (!(use_acquire
|| use_release
))
28364 arm_pre_atomic_barrier (mod_s
);
28369 label1
= gen_label_rtx ();
28370 emit_label (label1
);
28372 label2
= gen_label_rtx ();
28374 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28376 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28377 as required to communicate with arm_expand_compare_and_swap. */
28380 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28381 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28382 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28383 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28384 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28388 emit_move_insn (neg_bval
, const1_rtx
);
28389 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28390 if (thumb1_cmpneg_operand (oldval
, SImode
))
28391 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28394 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28397 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28399 /* Weak or strong, we want EQ to be true for success, so that we
28400 match the flags that we got from the compare above. */
28403 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28404 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28405 emit_insn (gen_rtx_SET (cond
, x
));
28410 /* Z is set to boolean value of !neg_bval, as required to communicate
28411 with arm_expand_compare_and_swap. */
28412 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28413 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28416 if (!is_mm_relaxed (mod_f
))
28417 emit_label (label2
);
28419 /* Checks whether a barrier is needed and emits one accordingly. */
28421 || !(use_acquire
|| use_release
))
28422 arm_post_atomic_barrier (mod_s
);
28424 if (is_mm_relaxed (mod_f
))
28425 emit_label (label2
);
28428 /* Split an atomic operation pattern. Operation is given by CODE and is one
28429 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28430 operation). Operation is performed on the content at MEM and on VALUE
28431 following the memory model MODEL_RTX. The content at MEM before and after
28432 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28433 success of the operation is returned in COND. Using a scratch register or
28434 an operand register for these determines what result is returned for that
28438 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28439 rtx value
, rtx model_rtx
, rtx cond
)
28441 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28442 machine_mode mode
= GET_MODE (mem
);
28443 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28444 rtx_code_label
*label
;
28445 bool all_low_regs
, bind_old_new
;
28448 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28450 bool use_acquire
= TARGET_HAVE_LDACQ
28451 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28452 || is_mm_release (model
));
28454 bool use_release
= TARGET_HAVE_LDACQ
28455 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28456 || is_mm_acquire (model
));
28458 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28459 a full barrier is emitted after the store-release. */
28461 use_acquire
= false;
28463 /* Checks whether a barrier is needed and emits one accordingly. */
28464 if (!(use_acquire
|| use_release
))
28465 arm_pre_atomic_barrier (model
);
28467 label
= gen_label_rtx ();
28468 emit_label (label
);
28471 new_out
= gen_lowpart (wmode
, new_out
);
28473 old_out
= gen_lowpart (wmode
, old_out
);
28476 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28478 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28480 /* Does the operation require destination and first operand to use the same
28481 register? This is decided by register constraints of relevant insn
28482 patterns in thumb1.md. */
28483 gcc_assert (!new_out
|| REG_P (new_out
));
28484 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28485 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28486 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28491 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28493 /* We want to return the old value while putting the result of the operation
28494 in the same register as the old value so copy the old value over to the
28495 destination register and use that register for the operation. */
28496 if (old_out
&& bind_old_new
)
28498 emit_move_insn (new_out
, old_out
);
28509 x
= gen_rtx_AND (wmode
, old_out
, value
);
28510 emit_insn (gen_rtx_SET (new_out
, x
));
28511 x
= gen_rtx_NOT (wmode
, new_out
);
28512 emit_insn (gen_rtx_SET (new_out
, x
));
28516 if (CONST_INT_P (value
))
28518 value
= GEN_INT (-INTVAL (value
));
28524 if (mode
== DImode
)
28526 /* DImode plus/minus need to clobber flags. */
28527 /* The adddi3 and subdi3 patterns are incorrectly written so that
28528 they require matching operands, even when we could easily support
28529 three operands. Thankfully, this can be fixed up post-splitting,
28530 as the individual add+adc patterns do accept three operands and
28531 post-reload cprop can make these moves go away. */
28532 emit_move_insn (new_out
, old_out
);
28534 x
= gen_adddi3 (new_out
, new_out
, value
);
28536 x
= gen_subdi3 (new_out
, new_out
, value
);
28543 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28544 emit_insn (gen_rtx_SET (new_out
, x
));
28548 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28551 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28552 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28554 /* Checks whether a barrier is needed and emits one accordingly. */
28556 || !(use_acquire
|| use_release
))
28557 arm_post_atomic_barrier (model
);
28560 #define MAX_VECT_LEN 16
28562 struct expand_vec_perm_d
28564 rtx target
, op0
, op1
;
28565 unsigned char perm
[MAX_VECT_LEN
];
28566 machine_mode vmode
;
28567 unsigned char nelt
;
28572 /* Generate a variable permutation. */
28575 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28577 machine_mode vmode
= GET_MODE (target
);
28578 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28580 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28581 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28582 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28583 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28584 gcc_checking_assert (TARGET_NEON
);
28588 if (vmode
== V8QImode
)
28589 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28591 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28597 if (vmode
== V8QImode
)
28599 pair
= gen_reg_rtx (V16QImode
);
28600 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28601 pair
= gen_lowpart (TImode
, pair
);
28602 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28606 pair
= gen_reg_rtx (OImode
);
28607 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28608 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28614 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28616 machine_mode vmode
= GET_MODE (target
);
28617 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
28618 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28619 rtx rmask
[MAX_VECT_LEN
], mask
;
28621 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28622 numbering of elements for big-endian, we must reverse the order. */
28623 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28625 /* The VTBL instruction does not use a modulo index, so we must take care
28626 of that ourselves. */
28627 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28628 for (i
= 0; i
< nelt
; ++i
)
28630 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
28631 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28633 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28636 /* Map lane ordering between architectural lane order, and GCC lane order,
28637 taking into account ABI. See comment above output_move_neon for details. */
28640 neon_endian_lane_map (machine_mode mode
, int lane
)
28642 if (BYTES_BIG_ENDIAN
)
28644 int nelems
= GET_MODE_NUNITS (mode
);
28645 /* Reverse lane order. */
28646 lane
= (nelems
- 1 - lane
);
28647 /* Reverse D register order, to match ABI. */
28648 if (GET_MODE_SIZE (mode
) == 16)
28649 lane
= lane
^ (nelems
/ 2);
28654 /* Some permutations index into pairs of vectors, this is a helper function
28655 to map indexes into those pairs of vectors. */
28658 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28660 int nelem
= GET_MODE_NUNITS (mode
);
28661 if (BYTES_BIG_ENDIAN
)
28663 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28667 /* Generate or test for an insn that supports a constant permutation. */
28669 /* Recognize patterns for the VUZP insns. */
28672 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28674 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28675 rtx out0
, out1
, in0
, in1
;
28676 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28680 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28683 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28684 big endian pattern on 64 bit vectors, so we correct for that. */
28685 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28686 && GET_MODE_SIZE (d
->vmode
) == 8 ? d
->nelt
: 0;
28688 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28690 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28692 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28696 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28698 for (i
= 0; i
< nelt
; i
++)
28701 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28702 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28712 case V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28713 case V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28714 case V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28715 case V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28716 case V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28717 case V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28718 case V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28719 case V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28720 case V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28721 case V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28723 gcc_unreachable ();
28728 if (swap_nelt
!= 0)
28729 std::swap (in0
, in1
);
28732 out1
= gen_reg_rtx (d
->vmode
);
28734 std::swap (out0
, out1
);
28736 emit_insn (gen (out0
, in0
, in1
, out1
));
28740 /* Recognize patterns for the VZIP insns. */
28743 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28745 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
28746 rtx out0
, out1
, in0
, in1
;
28747 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28751 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28754 is_swapped
= BYTES_BIG_ENDIAN
;
28756 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28759 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28761 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28765 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28767 for (i
= 0; i
< nelt
/ 2; i
++)
28770 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28771 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28775 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28776 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28787 case V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28788 case V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28789 case V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28790 case V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28791 case V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28792 case V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28793 case V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28794 case V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28795 case V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28796 case V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28798 gcc_unreachable ();
28804 std::swap (in0
, in1
);
28807 out1
= gen_reg_rtx (d
->vmode
);
28809 std::swap (out0
, out1
);
28811 emit_insn (gen (out0
, in0
, in1
, out1
));
28815 /* Recognize patterns for the VREV insns. */
28818 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28820 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
28821 rtx (*gen
)(rtx
, rtx
);
28823 if (!d
->one_vector_p
)
28832 case V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28833 case V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28841 case V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28842 case V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28843 case V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28844 case V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28845 case V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28846 case V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
28854 case V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
28855 case V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
28856 case V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
28857 case V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
28858 case V4SImode
: gen
= gen_neon_vrev64v4si
; break;
28859 case V2SImode
: gen
= gen_neon_vrev64v2si
; break;
28860 case V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
28861 case V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
28870 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
28871 for (j
= 0; j
<= diff
; j
+= 1)
28873 /* This is guaranteed to be true as the value of diff
28874 is 7, 3, 1 and we should have enough elements in the
28875 queue to generate this. Getting a vector mask with a
28876 value of diff other than these values implies that
28877 something is wrong by the time we get here. */
28878 gcc_assert (i
+ j
< nelt
);
28879 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
28887 emit_insn (gen (d
->target
, d
->op0
));
28891 /* Recognize patterns for the VTRN insns. */
28894 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
28896 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
28897 rtx out0
, out1
, in0
, in1
;
28898 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28900 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28903 /* Note that these are little-endian tests. Adjust for big-endian later. */
28904 if (d
->perm
[0] == 0)
28906 else if (d
->perm
[0] == 1)
28910 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28912 for (i
= 0; i
< nelt
; i
+= 2)
28914 if (d
->perm
[i
] != i
+ odd
)
28916 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
28926 case V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
28927 case V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
28928 case V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
28929 case V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
28930 case V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
28931 case V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
28932 case V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
28933 case V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
28934 case V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
28935 case V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
28937 gcc_unreachable ();
28942 if (BYTES_BIG_ENDIAN
)
28944 std::swap (in0
, in1
);
28949 out1
= gen_reg_rtx (d
->vmode
);
28951 std::swap (out0
, out1
);
28953 emit_insn (gen (out0
, in0
, in1
, out1
));
28957 /* Recognize patterns for the VEXT insns. */
28960 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
28962 unsigned int i
, nelt
= d
->nelt
;
28963 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
28966 unsigned int location
;
28968 unsigned int next
= d
->perm
[0] + 1;
28970 /* TODO: Handle GCC's numbering of elements for big-endian. */
28971 if (BYTES_BIG_ENDIAN
)
28974 /* Check if the extracted indexes are increasing by one. */
28975 for (i
= 1; i
< nelt
; next
++, i
++)
28977 /* If we hit the most significant element of the 2nd vector in
28978 the previous iteration, no need to test further. */
28979 if (next
== 2 * nelt
)
28982 /* If we are operating on only one vector: it could be a
28983 rotation. If there are only two elements of size < 64, let
28984 arm_evpc_neon_vrev catch it. */
28985 if (d
->one_vector_p
&& (next
== nelt
))
28987 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
28993 if (d
->perm
[i
] != next
)
28997 location
= d
->perm
[0];
29001 case V16QImode
: gen
= gen_neon_vextv16qi
; break;
29002 case V8QImode
: gen
= gen_neon_vextv8qi
; break;
29003 case V4HImode
: gen
= gen_neon_vextv4hi
; break;
29004 case V8HImode
: gen
= gen_neon_vextv8hi
; break;
29005 case V2SImode
: gen
= gen_neon_vextv2si
; break;
29006 case V4SImode
: gen
= gen_neon_vextv4si
; break;
29007 case V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29008 case V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29009 case V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29010 case V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29011 case V2DImode
: gen
= gen_neon_vextv2di
; break;
29020 offset
= GEN_INT (location
);
29021 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29025 /* The NEON VTBL instruction is a fully variable permuation that's even
29026 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29027 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29028 can do slightly better by expanding this as a constant where we don't
29029 have to apply a mask. */
29032 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29034 rtx rperm
[MAX_VECT_LEN
], sel
;
29035 machine_mode vmode
= d
->vmode
;
29036 unsigned int i
, nelt
= d
->nelt
;
29038 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29039 numbering of elements for big-endian, we must reverse the order. */
29040 if (BYTES_BIG_ENDIAN
)
29046 /* Generic code will try constant permutation twice. Once with the
29047 original mode and again with the elements lowered to QImode.
29048 So wait and don't do the selector expansion ourselves. */
29049 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29052 for (i
= 0; i
< nelt
; ++i
)
29053 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29054 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29055 sel
= force_reg (vmode
, sel
);
29057 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29062 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29064 /* Check if the input mask matches vext before reordering the
29067 if (arm_evpc_neon_vext (d
))
29070 /* The pattern matching functions above are written to look for a small
29071 number to begin the sequence (0, 1, N/2). If we begin with an index
29072 from the second operand, we can swap the operands. */
29073 if (d
->perm
[0] >= d
->nelt
)
29075 unsigned i
, nelt
= d
->nelt
;
29077 for (i
= 0; i
< nelt
; ++i
)
29078 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29080 std::swap (d
->op0
, d
->op1
);
29085 if (arm_evpc_neon_vuzp (d
))
29087 if (arm_evpc_neon_vzip (d
))
29089 if (arm_evpc_neon_vrev (d
))
29091 if (arm_evpc_neon_vtrn (d
))
29093 return arm_evpc_neon_vtbl (d
);
29098 /* Expand a vec_perm_const pattern. */
29101 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29103 struct expand_vec_perm_d d
;
29104 int i
, nelt
, which
;
29110 d
.vmode
= GET_MODE (target
);
29111 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29112 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29113 d
.testing_p
= false;
29115 for (i
= which
= 0; i
< nelt
; ++i
)
29117 rtx e
= XVECEXP (sel
, 0, i
);
29118 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29119 which
|= (ei
< nelt
? 1 : 2);
29129 d
.one_vector_p
= false;
29130 if (!rtx_equal_p (op0
, op1
))
29133 /* The elements of PERM do not suggest that only the first operand
29134 is used, but both operands are identical. Allow easier matching
29135 of the permutation by folding the permutation into the single
29139 for (i
= 0; i
< nelt
; ++i
)
29140 d
.perm
[i
] &= nelt
- 1;
29142 d
.one_vector_p
= true;
29147 d
.one_vector_p
= true;
29151 return arm_expand_vec_perm_const_1 (&d
);
29154 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29157 arm_vectorize_vec_perm_const_ok (machine_mode vmode
,
29158 const unsigned char *sel
)
29160 struct expand_vec_perm_d d
;
29161 unsigned int i
, nelt
, which
;
29165 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
29166 d
.testing_p
= true;
29167 memcpy (d
.perm
, sel
, nelt
);
29169 /* Categorize the set of elements in the selector. */
29170 for (i
= which
= 0; i
< nelt
; ++i
)
29172 unsigned char e
= d
.perm
[i
];
29173 gcc_assert (e
< 2 * nelt
);
29174 which
|= (e
< nelt
? 1 : 2);
29177 /* For all elements from second vector, fold the elements to first. */
29179 for (i
= 0; i
< nelt
; ++i
)
29182 /* Check whether the mask can be applied to the vector type. */
29183 d
.one_vector_p
= (which
!= 3);
29185 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29186 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29187 if (!d
.one_vector_p
)
29188 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29191 ret
= arm_expand_vec_perm_const_1 (&d
);
29198 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29200 /* If we are soft float and we do not have ldrd
29201 then all auto increment forms are ok. */
29202 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29207 /* Post increment and Pre Decrement are supported for all
29208 instruction forms except for vector forms. */
29211 if (VECTOR_MODE_P (mode
))
29213 if (code
!= ARM_PRE_DEC
)
29223 /* Without LDRD and mode size greater than
29224 word size, there is no point in auto-incrementing
29225 because ldm and stm will not have these forms. */
29226 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29229 /* Vector and floating point modes do not support
29230 these auto increment forms. */
29231 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29244 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29245 on ARM, since we know that shifts by negative amounts are no-ops.
29246 Additionally, the default expansion code is not available or suitable
29247 for post-reload insn splits (this can occur when the register allocator
29248 chooses not to do a shift in NEON).
29250 This function is used in both initial expand and post-reload splits, and
29251 handles all kinds of 64-bit shifts.
29253 Input requirements:
29254 - It is safe for the input and output to be the same register, but
29255 early-clobber rules apply for the shift amount and scratch registers.
29256 - Shift by register requires both scratch registers. In all other cases
29257 the scratch registers may be NULL.
29258 - Ashiftrt by a register also clobbers the CC register. */
29260 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29261 rtx amount
, rtx scratch1
, rtx scratch2
)
29263 rtx out_high
= gen_highpart (SImode
, out
);
29264 rtx out_low
= gen_lowpart (SImode
, out
);
29265 rtx in_high
= gen_highpart (SImode
, in
);
29266 rtx in_low
= gen_lowpart (SImode
, in
);
29269 in = the register pair containing the input value.
29270 out = the destination register pair.
29271 up = the high- or low-part of each pair.
29272 down = the opposite part to "up".
29273 In a shift, we can consider bits to shift from "up"-stream to
29274 "down"-stream, so in a left-shift "up" is the low-part and "down"
29275 is the high-part of each register pair. */
29277 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29278 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29279 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29280 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29282 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29284 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29285 && GET_MODE (out
) == DImode
);
29287 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29288 && GET_MODE (in
) == DImode
);
29290 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29291 && GET_MODE (amount
) == SImode
)
29292 || CONST_INT_P (amount
)));
29293 gcc_assert (scratch1
== NULL
29294 || (GET_CODE (scratch1
) == SCRATCH
)
29295 || (GET_MODE (scratch1
) == SImode
29296 && REG_P (scratch1
)));
29297 gcc_assert (scratch2
== NULL
29298 || (GET_CODE (scratch2
) == SCRATCH
)
29299 || (GET_MODE (scratch2
) == SImode
29300 && REG_P (scratch2
)));
29301 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29302 || !HARD_REGISTER_P (out
)
29303 || (REGNO (out
) != REGNO (amount
)
29304 && REGNO (out
) + 1 != REGNO (amount
)));
29306 /* Macros to make following code more readable. */
29307 #define SUB_32(DEST,SRC) \
29308 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29309 #define RSB_32(DEST,SRC) \
29310 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29311 #define SUB_S_32(DEST,SRC) \
29312 gen_addsi3_compare0 ((DEST), (SRC), \
29314 #define SET(DEST,SRC) \
29315 gen_rtx_SET ((DEST), (SRC))
29316 #define SHIFT(CODE,SRC,AMOUNT) \
29317 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29318 #define LSHIFT(CODE,SRC,AMOUNT) \
29319 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29320 SImode, (SRC), (AMOUNT))
29321 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29322 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29323 SImode, (SRC), (AMOUNT))
29325 gen_rtx_IOR (SImode, (A), (B))
29326 #define BRANCH(COND,LABEL) \
29327 gen_arm_cond_branch ((LABEL), \
29328 gen_rtx_ ## COND (CCmode, cc_reg, \
29332 /* Shifts by register and shifts by constant are handled separately. */
29333 if (CONST_INT_P (amount
))
29335 /* We have a shift-by-constant. */
29337 /* First, handle out-of-range shift amounts.
29338 In both cases we try to match the result an ARM instruction in a
29339 shift-by-register would give. This helps reduce execution
29340 differences between optimization levels, but it won't stop other
29341 parts of the compiler doing different things. This is "undefined
29342 behavior, in any case. */
29343 if (INTVAL (amount
) <= 0)
29344 emit_insn (gen_movdi (out
, in
));
29345 else if (INTVAL (amount
) >= 64)
29347 if (code
== ASHIFTRT
)
29349 rtx const31_rtx
= GEN_INT (31);
29350 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29351 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29354 emit_insn (gen_movdi (out
, const0_rtx
));
29357 /* Now handle valid shifts. */
29358 else if (INTVAL (amount
) < 32)
29360 /* Shifts by a constant less than 32. */
29361 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29363 /* Clearing the out register in DImode first avoids lots
29364 of spilling and results in less stack usage.
29365 Later this redundant insn is completely removed.
29366 Do that only if "in" and "out" are different registers. */
29367 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29368 emit_insn (SET (out
, const0_rtx
));
29369 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29370 emit_insn (SET (out_down
,
29371 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29373 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29377 /* Shifts by a constant greater than 31. */
29378 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29380 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29381 emit_insn (SET (out
, const0_rtx
));
29382 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29383 if (code
== ASHIFTRT
)
29384 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29387 emit_insn (SET (out_up
, const0_rtx
));
29392 /* We have a shift-by-register. */
29393 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29395 /* This alternative requires the scratch registers. */
29396 gcc_assert (scratch1
&& REG_P (scratch1
));
29397 gcc_assert (scratch2
&& REG_P (scratch2
));
29399 /* We will need the values "amount-32" and "32-amount" later.
29400 Swapping them around now allows the later code to be more general. */
29404 emit_insn (SUB_32 (scratch1
, amount
));
29405 emit_insn (RSB_32 (scratch2
, amount
));
29408 emit_insn (RSB_32 (scratch1
, amount
));
29409 /* Also set CC = amount > 32. */
29410 emit_insn (SUB_S_32 (scratch2
, amount
));
29413 emit_insn (RSB_32 (scratch1
, amount
));
29414 emit_insn (SUB_32 (scratch2
, amount
));
29417 gcc_unreachable ();
29420 /* Emit code like this:
29423 out_down = in_down << amount;
29424 out_down = (in_up << (amount - 32)) | out_down;
29425 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29426 out_up = in_up << amount;
29429 out_down = in_down >> amount;
29430 out_down = (in_up << (32 - amount)) | out_down;
29432 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29433 out_up = in_up << amount;
29436 out_down = in_down >> amount;
29437 out_down = (in_up << (32 - amount)) | out_down;
29439 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29440 out_up = in_up << amount;
29442 The ARM and Thumb2 variants are the same but implemented slightly
29443 differently. If this were only called during expand we could just
29444 use the Thumb2 case and let combine do the right thing, but this
29445 can also be called from post-reload splitters. */
29447 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29449 if (!TARGET_THUMB2
)
29451 /* Emit code for ARM mode. */
29452 emit_insn (SET (out_down
,
29453 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29454 if (code
== ASHIFTRT
)
29456 rtx_code_label
*done_label
= gen_label_rtx ();
29457 emit_jump_insn (BRANCH (LT
, done_label
));
29458 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29460 emit_label (done_label
);
29463 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29468 /* Emit code for Thumb2 mode.
29469 Thumb2 can't do shift and or in one insn. */
29470 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29471 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29473 if (code
== ASHIFTRT
)
29475 rtx_code_label
*done_label
= gen_label_rtx ();
29476 emit_jump_insn (BRANCH (LT
, done_label
));
29477 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29478 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29479 emit_label (done_label
);
29483 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29484 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29488 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29502 /* Returns true if the pattern is a valid symbolic address, which is either a
29503 symbol_ref or (symbol_ref + addend).
29505 According to the ARM ELF ABI, the initial addend of REL-type relocations
29506 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29507 literal field of the instruction as a 16-bit signed value in the range
29508 -32768 <= A < 32768. */
29511 arm_valid_symbolic_address_p (rtx addr
)
29513 rtx xop0
, xop1
= NULL_RTX
;
29516 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29519 /* (const (plus: symbol_ref const_int)) */
29520 if (GET_CODE (addr
) == CONST
)
29521 tmp
= XEXP (addr
, 0);
29523 if (GET_CODE (tmp
) == PLUS
)
29525 xop0
= XEXP (tmp
, 0);
29526 xop1
= XEXP (tmp
, 1);
29528 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29529 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29535 /* Returns true if a valid comparison operation and makes
29536 the operands in a form that is valid. */
29538 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29540 enum rtx_code code
= GET_CODE (*comparison
);
29542 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29543 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29545 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29547 if (code
== UNEQ
|| code
== LTGT
)
29550 code_int
= (int)code
;
29551 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29552 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29557 if (!arm_add_operand (*op1
, mode
))
29558 *op1
= force_reg (mode
, *op1
);
29559 if (!arm_add_operand (*op2
, mode
))
29560 *op2
= force_reg (mode
, *op2
);
29564 if (!cmpdi_operand (*op1
, mode
))
29565 *op1
= force_reg (mode
, *op1
);
29566 if (!cmpdi_operand (*op2
, mode
))
29567 *op2
= force_reg (mode
, *op2
);
29571 if (!TARGET_VFP_FP16INST
)
29573 /* FP16 comparisons are done in SF mode. */
29575 *op1
= convert_to_mode (mode
, *op1
, 1);
29576 *op2
= convert_to_mode (mode
, *op2
, 1);
29577 /* Fall through. */
29580 if (!vfp_compare_operand (*op1
, mode
))
29581 *op1
= force_reg (mode
, *op1
);
29582 if (!vfp_compare_operand (*op2
, mode
))
29583 *op2
= force_reg (mode
, *op2
);
29593 /* Maximum number of instructions to set block of memory. */
29595 arm_block_set_max_insns (void)
29597 if (optimize_function_for_size_p (cfun
))
29600 return current_tune
->max_insns_inline_memset
;
29603 /* Return TRUE if it's profitable to set block of memory for
29604 non-vectorized case. VAL is the value to set the memory
29605 with. LENGTH is the number of bytes to set. ALIGN is the
29606 alignment of the destination memory in bytes. UNALIGNED_P
29607 is TRUE if we can only set the memory with instructions
29608 meeting alignment requirements. USE_STRD_P is TRUE if we
29609 can use strd to set the memory. */
29611 arm_block_set_non_vect_profit_p (rtx val
,
29612 unsigned HOST_WIDE_INT length
,
29613 unsigned HOST_WIDE_INT align
,
29614 bool unaligned_p
, bool use_strd_p
)
29617 /* For leftovers in bytes of 0-7, we can set the memory block using
29618 strb/strh/str with minimum instruction number. */
29619 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29623 num
= arm_const_inline_cost (SET
, val
);
29624 num
+= length
/ align
+ length
% align
;
29626 else if (use_strd_p
)
29628 num
= arm_const_double_inline_cost (val
);
29629 num
+= (length
>> 3) + leftover
[length
& 7];
29633 num
= arm_const_inline_cost (SET
, val
);
29634 num
+= (length
>> 2) + leftover
[length
& 3];
29637 /* We may be able to combine last pair STRH/STRB into a single STR
29638 by shifting one byte back. */
29639 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29642 return (num
<= arm_block_set_max_insns ());
29645 /* Return TRUE if it's profitable to set block of memory for
29646 vectorized case. LENGTH is the number of bytes to set.
29647 ALIGN is the alignment of destination memory in bytes.
29648 MODE is the vector mode used to set the memory. */
29650 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29651 unsigned HOST_WIDE_INT align
,
29655 bool unaligned_p
= ((align
& 3) != 0);
29656 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29658 /* Instruction loading constant value. */
29660 /* Instructions storing the memory. */
29661 num
+= (length
+ nelt
- 1) / nelt
;
29662 /* Instructions adjusting the address expression. Only need to
29663 adjust address expression if it's 4 bytes aligned and bytes
29664 leftover can only be stored by mis-aligned store instruction. */
29665 if (!unaligned_p
&& (length
& 3) != 0)
29668 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29669 if (!unaligned_p
&& mode
== V16QImode
)
29672 return (num
<= arm_block_set_max_insns ());
29675 /* Set a block of memory using vectorization instructions for the
29676 unaligned case. We fill the first LENGTH bytes of the memory
29677 area starting from DSTBASE with byte constant VALUE. ALIGN is
29678 the alignment requirement of memory. Return TRUE if succeeded. */
29680 arm_block_set_unaligned_vect (rtx dstbase
,
29681 unsigned HOST_WIDE_INT length
,
29682 unsigned HOST_WIDE_INT value
,
29683 unsigned HOST_WIDE_INT align
)
29685 unsigned int i
, j
, nelt_v16
, nelt_v8
, nelt_mode
;
29687 rtx val_elt
, val_vec
, reg
;
29688 rtx rval
[MAX_VECT_LEN
];
29689 rtx (*gen_func
) (rtx
, rtx
);
29691 unsigned HOST_WIDE_INT v
= value
;
29692 unsigned int offset
= 0;
29693 gcc_assert ((align
& 0x3) != 0);
29694 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29695 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29696 if (length
>= nelt_v16
)
29699 gen_func
= gen_movmisalignv16qi
;
29704 gen_func
= gen_movmisalignv8qi
;
29706 nelt_mode
= GET_MODE_NUNITS (mode
);
29707 gcc_assert (length
>= nelt_mode
);
29708 /* Skip if it isn't profitable. */
29709 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29712 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29713 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29715 v
= sext_hwi (v
, BITS_PER_WORD
);
29716 val_elt
= GEN_INT (v
);
29717 for (j
= 0; j
< nelt_mode
; j
++)
29720 reg
= gen_reg_rtx (mode
);
29721 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29722 /* Emit instruction loading the constant value. */
29723 emit_move_insn (reg
, val_vec
);
29725 /* Handle nelt_mode bytes in a vector. */
29726 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29728 emit_insn ((*gen_func
) (mem
, reg
));
29729 if (i
+ 2 * nelt_mode
<= length
)
29731 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29732 offset
+= nelt_mode
;
29733 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29737 /* If there are not less than nelt_v8 bytes leftover, we must be in
29739 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29741 /* Handle (8, 16) bytes leftover. */
29742 if (i
+ nelt_v8
< length
)
29744 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29745 offset
+= length
- i
;
29746 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29748 /* We are shifting bytes back, set the alignment accordingly. */
29749 if ((length
& 1) != 0 && align
>= 2)
29750 set_mem_align (mem
, BITS_PER_UNIT
);
29752 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29754 /* Handle (0, 8] bytes leftover. */
29755 else if (i
< length
&& i
+ nelt_v8
>= length
)
29757 if (mode
== V16QImode
)
29758 reg
= gen_lowpart (V8QImode
, reg
);
29760 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29761 + (nelt_mode
- nelt_v8
))));
29762 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29763 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29765 /* We are shifting bytes back, set the alignment accordingly. */
29766 if ((length
& 1) != 0 && align
>= 2)
29767 set_mem_align (mem
, BITS_PER_UNIT
);
29769 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29775 /* Set a block of memory using vectorization instructions for the
29776 aligned case. We fill the first LENGTH bytes of the memory area
29777 starting from DSTBASE with byte constant VALUE. ALIGN is the
29778 alignment requirement of memory. Return TRUE if succeeded. */
29780 arm_block_set_aligned_vect (rtx dstbase
,
29781 unsigned HOST_WIDE_INT length
,
29782 unsigned HOST_WIDE_INT value
,
29783 unsigned HOST_WIDE_INT align
)
29785 unsigned int i
, j
, nelt_v8
, nelt_v16
, nelt_mode
;
29786 rtx dst
, addr
, mem
;
29787 rtx val_elt
, val_vec
, reg
;
29788 rtx rval
[MAX_VECT_LEN
];
29790 unsigned HOST_WIDE_INT v
= value
;
29791 unsigned int offset
= 0;
29793 gcc_assert ((align
& 0x3) == 0);
29794 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29795 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29796 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29801 nelt_mode
= GET_MODE_NUNITS (mode
);
29802 gcc_assert (length
>= nelt_mode
);
29803 /* Skip if it isn't profitable. */
29804 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29807 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29809 v
= sext_hwi (v
, BITS_PER_WORD
);
29810 val_elt
= GEN_INT (v
);
29811 for (j
= 0; j
< nelt_mode
; j
++)
29814 reg
= gen_reg_rtx (mode
);
29815 val_vec
= gen_rtx_CONST_VECTOR (mode
, gen_rtvec_v (nelt_mode
, rval
));
29816 /* Emit instruction loading the constant value. */
29817 emit_move_insn (reg
, val_vec
);
29820 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29821 if (mode
== V16QImode
)
29823 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29824 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29826 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29827 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29829 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29830 offset
+= length
- nelt_mode
;
29831 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29832 /* We are shifting bytes back, set the alignment accordingly. */
29833 if ((length
& 0x3) == 0)
29834 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29835 else if ((length
& 0x1) == 0)
29836 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29838 set_mem_align (mem
, BITS_PER_UNIT
);
29840 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29843 /* Fall through for bytes leftover. */
29845 nelt_mode
= GET_MODE_NUNITS (mode
);
29846 reg
= gen_lowpart (V8QImode
, reg
);
29849 /* Handle 8 bytes in a vector. */
29850 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29852 addr
= plus_constant (Pmode
, dst
, i
);
29853 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29854 emit_move_insn (mem
, reg
);
29857 /* Handle single word leftover by shifting 4 bytes back. We can
29858 use aligned access for this case. */
29859 if (i
+ UNITS_PER_WORD
== length
)
29861 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
29862 offset
+= i
- UNITS_PER_WORD
;
29863 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
29864 /* We are shifting 4 bytes back, set the alignment accordingly. */
29865 if (align
> UNITS_PER_WORD
)
29866 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
29868 emit_move_insn (mem
, reg
);
29870 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29871 We have to use unaligned access for this case. */
29872 else if (i
< length
)
29874 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29875 offset
+= length
- nelt_mode
;
29876 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29877 /* We are shifting bytes back, set the alignment accordingly. */
29878 if ((length
& 1) == 0)
29879 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29881 set_mem_align (mem
, BITS_PER_UNIT
);
29883 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29889 /* Set a block of memory using plain strh/strb instructions, only
29890 using instructions allowed by ALIGN on processor. We fill the
29891 first LENGTH bytes of the memory area starting from DSTBASE
29892 with byte constant VALUE. ALIGN is the alignment requirement
29895 arm_block_set_unaligned_non_vect (rtx dstbase
,
29896 unsigned HOST_WIDE_INT length
,
29897 unsigned HOST_WIDE_INT value
,
29898 unsigned HOST_WIDE_INT align
)
29901 rtx dst
, addr
, mem
;
29902 rtx val_exp
, val_reg
, reg
;
29904 HOST_WIDE_INT v
= value
;
29906 gcc_assert (align
== 1 || align
== 2);
29909 v
|= (value
<< BITS_PER_UNIT
);
29911 v
= sext_hwi (v
, BITS_PER_WORD
);
29912 val_exp
= GEN_INT (v
);
29913 /* Skip if it isn't profitable. */
29914 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29915 align
, true, false))
29918 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29919 mode
= (align
== 2 ? HImode
: QImode
);
29920 val_reg
= force_reg (SImode
, val_exp
);
29921 reg
= gen_lowpart (mode
, val_reg
);
29923 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
29925 addr
= plus_constant (Pmode
, dst
, i
);
29926 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
29927 emit_move_insn (mem
, reg
);
29930 /* Handle single byte leftover. */
29931 if (i
+ 1 == length
)
29933 reg
= gen_lowpart (QImode
, val_reg
);
29934 addr
= plus_constant (Pmode
, dst
, i
);
29935 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
29936 emit_move_insn (mem
, reg
);
29940 gcc_assert (i
== length
);
29944 /* Set a block of memory using plain strd/str/strh/strb instructions,
29945 to permit unaligned copies on processors which support unaligned
29946 semantics for those instructions. We fill the first LENGTH bytes
29947 of the memory area starting from DSTBASE with byte constant VALUE.
29948 ALIGN is the alignment requirement of memory. */
29950 arm_block_set_aligned_non_vect (rtx dstbase
,
29951 unsigned HOST_WIDE_INT length
,
29952 unsigned HOST_WIDE_INT value
,
29953 unsigned HOST_WIDE_INT align
)
29956 rtx dst
, addr
, mem
;
29957 rtx val_exp
, val_reg
, reg
;
29958 unsigned HOST_WIDE_INT v
;
29961 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
29962 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
29964 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
29965 if (length
< UNITS_PER_WORD
)
29966 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
29969 v
|= (v
<< BITS_PER_WORD
);
29971 v
= sext_hwi (v
, BITS_PER_WORD
);
29973 val_exp
= GEN_INT (v
);
29974 /* Skip if it isn't profitable. */
29975 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29976 align
, false, use_strd_p
))
29981 /* Try without strd. */
29982 v
= (v
>> BITS_PER_WORD
);
29983 v
= sext_hwi (v
, BITS_PER_WORD
);
29984 val_exp
= GEN_INT (v
);
29985 use_strd_p
= false;
29986 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
29987 align
, false, use_strd_p
))
29992 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29993 /* Handle double words using strd if possible. */
29996 val_reg
= force_reg (DImode
, val_exp
);
29998 for (; (i
+ 8 <= length
); i
+= 8)
30000 addr
= plus_constant (Pmode
, dst
, i
);
30001 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30002 emit_move_insn (mem
, reg
);
30006 val_reg
= force_reg (SImode
, val_exp
);
30008 /* Handle words. */
30009 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30010 for (; (i
+ 4 <= length
); i
+= 4)
30012 addr
= plus_constant (Pmode
, dst
, i
);
30013 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30014 if ((align
& 3) == 0)
30015 emit_move_insn (mem
, reg
);
30017 emit_insn (gen_unaligned_storesi (mem
, reg
));
30020 /* Merge last pair of STRH and STRB into a STR if possible. */
30021 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30023 addr
= plus_constant (Pmode
, dst
, i
- 1);
30024 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30025 /* We are shifting one byte back, set the alignment accordingly. */
30026 if ((align
& 1) == 0)
30027 set_mem_align (mem
, BITS_PER_UNIT
);
30029 /* Most likely this is an unaligned access, and we can't tell at
30030 compilation time. */
30031 emit_insn (gen_unaligned_storesi (mem
, reg
));
30035 /* Handle half word leftover. */
30036 if (i
+ 2 <= length
)
30038 reg
= gen_lowpart (HImode
, val_reg
);
30039 addr
= plus_constant (Pmode
, dst
, i
);
30040 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30041 if ((align
& 1) == 0)
30042 emit_move_insn (mem
, reg
);
30044 emit_insn (gen_unaligned_storehi (mem
, reg
));
30049 /* Handle single byte leftover. */
30050 if (i
+ 1 == length
)
30052 reg
= gen_lowpart (QImode
, val_reg
);
30053 addr
= plus_constant (Pmode
, dst
, i
);
30054 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30055 emit_move_insn (mem
, reg
);
30061 /* Set a block of memory using vectorization instructions for both
30062 aligned and unaligned cases. We fill the first LENGTH bytes of
30063 the memory area starting from DSTBASE with byte constant VALUE.
30064 ALIGN is the alignment requirement of memory. */
30066 arm_block_set_vect (rtx dstbase
,
30067 unsigned HOST_WIDE_INT length
,
30068 unsigned HOST_WIDE_INT value
,
30069 unsigned HOST_WIDE_INT align
)
30071 /* Check whether we need to use unaligned store instruction. */
30072 if (((align
& 3) != 0 || (length
& 3) != 0)
30073 /* Check whether unaligned store instruction is available. */
30074 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30077 if ((align
& 3) == 0)
30078 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30080 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30083 /* Expand string store operation. Firstly we try to do that by using
30084 vectorization instructions, then try with ARM unaligned access and
30085 double-word store if profitable. OPERANDS[0] is the destination,
30086 OPERANDS[1] is the number of bytes, operands[2] is the value to
30087 initialize the memory, OPERANDS[3] is the known alignment of the
30090 arm_gen_setmem (rtx
*operands
)
30092 rtx dstbase
= operands
[0];
30093 unsigned HOST_WIDE_INT length
;
30094 unsigned HOST_WIDE_INT value
;
30095 unsigned HOST_WIDE_INT align
;
30097 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30100 length
= UINTVAL (operands
[1]);
30104 value
= (UINTVAL (operands
[2]) & 0xFF);
30105 align
= UINTVAL (operands
[3]);
30106 if (TARGET_NEON
&& length
>= 8
30107 && current_tune
->string_ops_prefer_neon
30108 && arm_block_set_vect (dstbase
, length
, value
, align
))
30111 if (!unaligned_access
&& (align
& 3) != 0)
30112 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30114 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30119 arm_macro_fusion_p (void)
30121 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30124 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30125 for MOVW / MOVT macro fusion. */
30128 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30130 /* We are trying to fuse
30131 movw imm / movt imm
30132 instructions as a group that gets scheduled together. */
30134 rtx set_dest
= SET_DEST (curr_set
);
30136 if (GET_MODE (set_dest
) != SImode
)
30139 /* We are trying to match:
30140 prev (movw) == (set (reg r0) (const_int imm16))
30141 curr (movt) == (set (zero_extract (reg r0)
30144 (const_int imm16_1))
30146 prev (movw) == (set (reg r1)
30147 (high (symbol_ref ("SYM"))))
30148 curr (movt) == (set (reg r0)
30150 (symbol_ref ("SYM")))) */
30152 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30154 if (CONST_INT_P (SET_SRC (curr_set
))
30155 && CONST_INT_P (SET_SRC (prev_set
))
30156 && REG_P (XEXP (set_dest
, 0))
30157 && REG_P (SET_DEST (prev_set
))
30158 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30162 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30163 && REG_P (SET_DEST (curr_set
))
30164 && REG_P (SET_DEST (prev_set
))
30165 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30166 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30173 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30175 rtx prev_set
= single_set (prev
);
30176 rtx curr_set
= single_set (curr
);
30182 if (any_condjump_p (curr
))
30185 if (!arm_macro_fusion_p ())
30188 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30189 && aarch_crypto_can_dual_issue (prev
, curr
))
30192 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30193 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30199 /* Return true iff the instruction fusion described by OP is enabled. */
30201 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30203 return current_tune
->fusible_ops
& op
;
30206 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30207 scheduled for speculative execution. Reject the long-running division
30208 and square-root instructions. */
30211 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30213 switch (get_attr_type (insn
))
30221 case TYPE_NEON_FP_SQRT_S
:
30222 case TYPE_NEON_FP_SQRT_D
:
30223 case TYPE_NEON_FP_SQRT_S_Q
:
30224 case TYPE_NEON_FP_SQRT_D_Q
:
30225 case TYPE_NEON_FP_DIV_S
:
30226 case TYPE_NEON_FP_DIV_D
:
30227 case TYPE_NEON_FP_DIV_S_Q
:
30228 case TYPE_NEON_FP_DIV_D_Q
:
30235 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30237 static unsigned HOST_WIDE_INT
30238 arm_asan_shadow_offset (void)
30240 return HOST_WIDE_INT_1U
<< 29;
30244 /* This is a temporary fix for PR60655. Ideally we need
30245 to handle most of these cases in the generic part but
30246 currently we reject minus (..) (sym_ref). We try to
30247 ameliorate the case with minus (sym_ref1) (sym_ref2)
30248 where they are in the same section. */
30251 arm_const_not_ok_for_debug_p (rtx p
)
30253 tree decl_op0
= NULL
;
30254 tree decl_op1
= NULL
;
30256 if (GET_CODE (p
) == MINUS
)
30258 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30260 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30262 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30263 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30265 if ((VAR_P (decl_op1
)
30266 || TREE_CODE (decl_op1
) == CONST_DECL
)
30267 && (VAR_P (decl_op0
)
30268 || TREE_CODE (decl_op0
) == CONST_DECL
))
30269 return (get_variable_section (decl_op1
, false)
30270 != get_variable_section (decl_op0
, false));
30272 if (TREE_CODE (decl_op1
) == LABEL_DECL
30273 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30274 return (DECL_CONTEXT (decl_op1
)
30275 != DECL_CONTEXT (decl_op0
));
30285 /* return TRUE if x is a reference to a value in a constant pool */
30287 arm_is_constant_pool_ref (rtx x
)
30290 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30291 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30294 /* Remember the last target of arm_set_current_function. */
30295 static GTY(()) tree arm_previous_fndecl
;
30297 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30300 save_restore_target_globals (tree new_tree
)
30302 /* If we have a previous state, use it. */
30303 if (TREE_TARGET_GLOBALS (new_tree
))
30304 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30305 else if (new_tree
== target_option_default_node
)
30306 restore_target_globals (&default_target_globals
);
30309 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30310 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30313 arm_option_params_internal ();
30316 /* Invalidate arm_previous_fndecl. */
30319 arm_reset_previous_fndecl (void)
30321 arm_previous_fndecl
= NULL_TREE
;
30324 /* Establish appropriate back-end context for processing the function
30325 FNDECL. The argument might be NULL to indicate processing at top
30326 level, outside of any function scope. */
30329 arm_set_current_function (tree fndecl
)
30331 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30334 tree old_tree
= (arm_previous_fndecl
30335 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30338 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30340 /* If current function has no attributes but previous one did,
30341 use the default node. */
30342 if (! new_tree
&& old_tree
)
30343 new_tree
= target_option_default_node
;
30345 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30346 the default have been handled by save_restore_target_globals from
30347 arm_pragma_target_parse. */
30348 if (old_tree
== new_tree
)
30351 arm_previous_fndecl
= fndecl
;
30353 /* First set the target options. */
30354 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30356 save_restore_target_globals (new_tree
);
30359 /* Implement TARGET_OPTION_PRINT. */
30362 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30364 int flags
= ptr
->x_target_flags
;
30365 const char *fpu_name
;
30367 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30368 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30370 fprintf (file
, "%*sselected arch %s\n", indent
, "",
30371 TARGET_THUMB2_P (flags
) ? "thumb2" :
30372 TARGET_THUMB_P (flags
) ? "thumb1" :
30375 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30378 /* Hook to determine if one function can safely inline another. */
30381 arm_can_inline_p (tree caller
, tree callee
)
30383 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30384 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30385 bool can_inline
= true;
30387 struct cl_target_option
*caller_opts
30388 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30389 : target_option_default_node
);
30391 struct cl_target_option
*callee_opts
30392 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30393 : target_option_default_node
);
30395 if (callee_opts
== caller_opts
)
30398 /* Callee's ISA features should be a subset of the caller's. */
30399 struct arm_build_target caller_target
;
30400 struct arm_build_target callee_target
;
30401 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30402 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30404 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30406 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30408 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30409 can_inline
= false;
30411 sbitmap_free (caller_target
.isa
);
30412 sbitmap_free (callee_target
.isa
);
30414 /* OK to inline between different modes.
30415 Function with mode specific instructions, e.g using asm,
30416 must be explicitly protected with noinline. */
30420 /* Hook to fix function's alignment affected by target attribute. */
30423 arm_relayout_function (tree fndecl
)
30425 if (DECL_USER_ALIGN (fndecl
))
30428 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30431 callee_tree
= target_option_default_node
;
30433 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30436 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30439 /* Inner function to process the attribute((target(...))), take an argument and
30440 set the current options from the argument. If we have a list, recursively
30441 go over the list. */
30444 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30446 if (TREE_CODE (args
) == TREE_LIST
)
30450 for (; args
; args
= TREE_CHAIN (args
))
30451 if (TREE_VALUE (args
)
30452 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30457 else if (TREE_CODE (args
) != STRING_CST
)
30459 error ("attribute %<target%> argument not a string");
30463 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30466 while ((q
= strtok (argstr
, ",")) != NULL
)
30468 while (ISSPACE (*q
)) ++q
;
30471 if (!strncmp (q
, "thumb", 5))
30472 opts
->x_target_flags
|= MASK_THUMB
;
30474 else if (!strncmp (q
, "arm", 3))
30475 opts
->x_target_flags
&= ~MASK_THUMB
;
30477 else if (!strncmp (q
, "fpu=", 4))
30480 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30481 &fpu_index
, CL_TARGET
))
30483 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30486 if (fpu_index
== TARGET_FPU_auto
)
30488 /* This doesn't really make sense until we support
30489 general dynamic selection of the architecture and all
30491 sorry ("auto fpu selection not currently permitted here");
30494 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30498 error ("attribute(target(\"%s\")) is unknown", q
);
30506 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30509 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30510 struct gcc_options
*opts_set
)
30512 struct cl_target_option cl_opts
;
30514 if (!arm_valid_target_attribute_rec (args
, opts
))
30517 cl_target_option_save (&cl_opts
, opts
);
30518 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30519 arm_option_check_internal (opts
);
30520 /* Do any overrides, such as global options arch=xxx. */
30521 arm_option_override_internal (opts
, opts_set
);
30523 return build_target_option_node (opts
);
30527 add_attribute (const char * mode
, tree
*attributes
)
30529 size_t len
= strlen (mode
);
30530 tree value
= build_string (len
, mode
);
30532 TREE_TYPE (value
) = build_array_type (char_type_node
,
30533 build_index_type (size_int (len
)));
30535 *attributes
= tree_cons (get_identifier ("target"),
30536 build_tree_list (NULL_TREE
, value
),
30540 /* For testing. Insert thumb or arm modes alternatively on functions. */
30543 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30547 if (! TARGET_FLIP_THUMB
)
30550 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30551 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30554 /* Nested definitions must inherit mode. */
30555 if (current_function_decl
)
30557 mode
= TARGET_THUMB
? "thumb" : "arm";
30558 add_attribute (mode
, attributes
);
30562 /* If there is already a setting don't change it. */
30563 if (lookup_attribute ("target", *attributes
) != NULL
)
30566 mode
= thumb_flipper
? "thumb" : "arm";
30567 add_attribute (mode
, attributes
);
30569 thumb_flipper
= !thumb_flipper
;
30572 /* Hook to validate attribute((target("string"))). */
30575 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30576 tree args
, int ARG_UNUSED (flags
))
30579 struct gcc_options func_options
;
30580 tree cur_tree
, new_optimize
;
30581 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30583 /* Get the optimization options of the current function. */
30584 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30586 /* If the function changed the optimization levels as well as setting target
30587 options, start with the optimizations specified. */
30588 if (!func_optimize
)
30589 func_optimize
= optimization_default_node
;
30591 /* Init func_options. */
30592 memset (&func_options
, 0, sizeof (func_options
));
30593 init_options_struct (&func_options
, NULL
);
30594 lang_hooks
.init_options_struct (&func_options
);
30596 /* Initialize func_options to the defaults. */
30597 cl_optimization_restore (&func_options
,
30598 TREE_OPTIMIZATION (func_optimize
));
30600 cl_target_option_restore (&func_options
,
30601 TREE_TARGET_OPTION (target_option_default_node
));
30603 /* Set func_options flags with new target mode. */
30604 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30605 &global_options_set
);
30607 if (cur_tree
== NULL_TREE
)
30610 new_optimize
= build_optimization_node (&func_options
);
30612 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30614 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30616 finalize_options_struct (&func_options
);
30621 /* Match an ISA feature bitmap to a named FPU. We always use the
30622 first entry that exactly matches the feature set, so that we
30623 effectively canonicalize the FPU name for the assembler. */
30625 arm_identify_fpu_from_isa (sbitmap isa
)
30627 auto_sbitmap
fpubits (isa_num_bits
);
30628 auto_sbitmap
cand_fpubits (isa_num_bits
);
30630 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30632 /* If there are no ISA feature bits relating to the FPU, we must be
30633 doing soft-float. */
30634 if (bitmap_empty_p (fpubits
))
30637 for (unsigned int i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
30639 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30640 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30641 return all_fpus
[i
].name
;
30643 /* We must find an entry, or things have gone wrong. */
30644 gcc_unreachable ();
30648 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30651 fprintf (stream
, "\t.syntax unified\n");
30655 if (is_called_in_ARM_mode (decl
)
30656 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30657 && cfun
->is_thunk
))
30658 fprintf (stream
, "\t.code 32\n");
30659 else if (TARGET_THUMB1
)
30660 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30662 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30665 fprintf (stream
, "\t.arm\n");
30667 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30670 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30672 if (TARGET_POKE_FUNCTION_NAME
)
30673 arm_poke_function_name (stream
, (const char *) name
);
30676 /* If MEM is in the form of [base+offset], extract the two parts
30677 of address and set to BASE and OFFSET, otherwise return false
30678 after clearing BASE and OFFSET. */
30681 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30685 gcc_assert (MEM_P (mem
));
30687 addr
= XEXP (mem
, 0);
30689 /* Strip off const from addresses like (const (addr)). */
30690 if (GET_CODE (addr
) == CONST
)
30691 addr
= XEXP (addr
, 0);
30693 if (GET_CODE (addr
) == REG
)
30696 *offset
= const0_rtx
;
30700 if (GET_CODE (addr
) == PLUS
30701 && GET_CODE (XEXP (addr
, 0)) == REG
30702 && CONST_INT_P (XEXP (addr
, 1)))
30704 *base
= XEXP (addr
, 0);
30705 *offset
= XEXP (addr
, 1);
30710 *offset
= NULL_RTX
;
30715 /* If INSN is a load or store of address in the form of [base+offset],
30716 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30717 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30718 otherwise return FALSE. */
30721 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30725 gcc_assert (INSN_P (insn
));
30726 x
= PATTERN (insn
);
30727 if (GET_CODE (x
) != SET
)
30731 dest
= SET_DEST (x
);
30732 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30735 extract_base_offset_in_addr (dest
, base
, offset
);
30737 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30740 extract_base_offset_in_addr (src
, base
, offset
);
30745 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30748 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30750 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30751 and PRI are only calculated for these instructions. For other instruction,
30752 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30753 instruction fusion can be supported by returning different priorities.
30755 It's important that irrelevant instructions get the largest FUSION_PRI. */
30758 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30759 int *fusion_pri
, int *pri
)
30765 gcc_assert (INSN_P (insn
));
30768 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30775 /* Load goes first. */
30777 *fusion_pri
= tmp
- 1;
30779 *fusion_pri
= tmp
- 2;
30783 /* INSN with smaller base register goes first. */
30784 tmp
-= ((REGNO (base
) & 0xff) << 20);
30786 /* INSN with smaller offset goes first. */
30787 off_val
= (int)(INTVAL (offset
));
30789 tmp
-= (off_val
& 0xfffff);
30791 tmp
+= ((- off_val
) & 0xfffff);
30798 /* Construct and return a PARALLEL RTX vector with elements numbering the
30799 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30800 the vector - from the perspective of the architecture. This does not
30801 line up with GCC's perspective on lane numbers, so we end up with
30802 different masks depending on our target endian-ness. The diagram
30803 below may help. We must draw the distinction when building masks
30804 which select one half of the vector. An instruction selecting
30805 architectural low-lanes for a big-endian target, must be described using
30806 a mask selecting GCC high-lanes.
30808 Big-Endian Little-Endian
30810 GCC 0 1 2 3 3 2 1 0
30811 | x | x | x | x | | x | x | x | x |
30812 Architecture 3 2 1 0 3 2 1 0
30814 Low Mask: { 2, 3 } { 0, 1 }
30815 High Mask: { 0, 1 } { 2, 3 }
30819 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30821 int nunits
= GET_MODE_NUNITS (mode
);
30822 rtvec v
= rtvec_alloc (nunits
/ 2);
30823 int high_base
= nunits
/ 2;
30829 if (BYTES_BIG_ENDIAN
)
30830 base
= high
? low_base
: high_base
;
30832 base
= high
? high_base
: low_base
;
30834 for (i
= 0; i
< nunits
/ 2; i
++)
30835 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30837 t1
= gen_rtx_PARALLEL (mode
, v
);
30841 /* Check OP for validity as a PARALLEL RTX vector with elements
30842 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30843 from the perspective of the architecture. See the diagram above
30844 arm_simd_vect_par_cnst_half_p for more details. */
30847 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
30850 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
30851 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
30852 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
30855 if (!VECTOR_MODE_P (mode
))
30858 if (count_op
!= count_ideal
)
30861 for (i
= 0; i
< count_ideal
; i
++)
30863 rtx elt_op
= XVECEXP (op
, 0, i
);
30864 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
30866 if (!CONST_INT_P (elt_op
)
30867 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
30873 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30876 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
30879 /* For now, we punt and not handle this for TARGET_THUMB1. */
30880 if (vcall_offset
&& TARGET_THUMB1
)
30883 /* Otherwise ok. */
30887 /* Generate RTL for a conditional branch with rtx comparison CODE in
30888 mode CC_MODE. The destination of the unlikely conditional branch
30892 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
30896 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
30897 gen_rtx_REG (cc_mode
, CC_REGNUM
),
30900 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
30901 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
30903 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
30906 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30908 For pure-code sections there is no letter code for this attribute, so
30909 output all the section flags numerically when this is needed. */
30912 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
30915 if (flags
& SECTION_ARM_PURECODE
)
30919 if (!(flags
& SECTION_DEBUG
))
30921 if (flags
& SECTION_EXCLUDE
)
30922 *num
|= 0x80000000;
30923 if (flags
& SECTION_WRITE
)
30925 if (flags
& SECTION_CODE
)
30927 if (flags
& SECTION_MERGE
)
30929 if (flags
& SECTION_STRINGS
)
30931 if (flags
& SECTION_TLS
)
30933 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
30942 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30944 If pure-code is passed as an option, make sure all functions are in
30945 sections that have the SHF_ARM_PURECODE attribute. */
30948 arm_function_section (tree decl
, enum node_frequency freq
,
30949 bool startup
, bool exit
)
30951 const char * section_name
;
30954 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
30955 return default_function_section (decl
, freq
, startup
, exit
);
30957 if (!target_pure_code
)
30958 return default_function_section (decl
, freq
, startup
, exit
);
30961 section_name
= DECL_SECTION_NAME (decl
);
30963 /* If a function is not in a named section then it falls under the 'default'
30964 text section, also known as '.text'. We can preserve previous behavior as
30965 the default text section already has the SHF_ARM_PURECODE section
30969 section
*default_sec
= default_function_section (decl
, freq
, startup
,
30972 /* If default_sec is not null, then it must be a special section like for
30973 example .text.startup. We set the pure-code attribute and return the
30974 same section to preserve existing behavior. */
30976 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30977 return default_sec
;
30980 /* Otherwise look whether a section has already been created with
30982 sec
= get_named_section (decl
, section_name
, 0);
30984 /* If that is not the case passing NULL as the section's name to
30985 'get_named_section' will create a section with the declaration's
30987 sec
= get_named_section (decl
, NULL
, 0);
30989 /* Set the SHF_ARM_PURECODE attribute. */
30990 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
30995 /* Implements the TARGET_SECTION_FLAGS hook.
30997 If DECL is a function declaration and pure-code is passed as an option
30998 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30999 section's name and RELOC indicates whether the declarations initializer may
31000 contain runtime relocations. */
31002 static unsigned int
31003 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31005 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31007 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31008 flags
|= SECTION_ARM_PURECODE
;
31013 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31016 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31018 rtx
*quot_p
, rtx
*rem_p
)
31020 if (mode
== SImode
)
31021 gcc_assert (!TARGET_IDIV
);
31023 machine_mode libval_mode
= smallest_mode_for_size (2 * GET_MODE_BITSIZE (mode
),
31026 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31028 op0
, GET_MODE (op0
),
31029 op1
, GET_MODE (op1
));
31031 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31032 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31033 GET_MODE_SIZE (mode
));
31035 gcc_assert (quotient
);
31036 gcc_assert (remainder
);
31038 *quot_p
= quotient
;
31039 *rem_p
= remainder
;
31042 /* This function checks for the availability of the coprocessor builtin passed
31043 in BUILTIN for the current target. Returns true if it is available and
31044 false otherwise. If a BUILTIN is passed for which this function has not
31045 been implemented it will cause an exception. */
31048 arm_coproc_builtin_available (enum unspecv builtin
)
31050 /* None of these builtins are available in Thumb mode if the target only
31051 supports Thumb-1. */
31069 case VUNSPEC_LDC2L
:
31071 case VUNSPEC_STC2L
:
31074 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31081 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31083 if (arm_arch6
|| arm_arch5te
)
31086 case VUNSPEC_MCRR2
:
31087 case VUNSPEC_MRRC2
:
31092 gcc_unreachable ();
31097 /* This function returns true if OP is a valid memory operand for the ldc and
31098 stc coprocessor instructions and false otherwise. */
31101 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31103 HOST_WIDE_INT range
;
31104 /* Has to be a memory operand. */
31110 /* We accept registers. */
31114 switch GET_CODE (op
)
31118 /* Or registers with an offset. */
31119 if (!REG_P (XEXP (op
, 0)))
31124 /* The offset must be an immediate though. */
31125 if (!CONST_INT_P (op
))
31128 range
= INTVAL (op
);
31130 /* Within the range of [-1020,1020]. */
31131 if (!IN_RANGE (range
, -1020, 1020))
31134 /* And a multiple of 4. */
31135 return (range
% 4) == 0;
31141 return REG_P (XEXP (op
, 0));
31143 gcc_unreachable ();
31147 #include "gt-arm.h"