1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
26 #define INCLUDE_STRING
28 #include "coretypes.h"
38 #include "stringpool.h"
45 #include "diagnostic-core.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
52 #include "insn-attr.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
65 #include "target-globals.h"
67 #include "tm-constrs.h"
69 #include "optabs-libfuncs.h"
73 #include "tree-vectorizer.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 /* Forward definitions of types. */
80 typedef struct minipool_node Mnode
;
81 typedef struct minipool_fixup Mfix
;
83 void (*arm_lang_output_object_attributes_hook
)(void);
90 /* Forward function declarations. */
91 static bool arm_const_not_ok_for_debug_p (rtx
);
92 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
93 static int arm_compute_static_chain_stack_bytes (void);
94 static arm_stack_offsets
*arm_get_frame_offsets (void);
95 static void arm_compute_frame_layout (void);
96 static void arm_add_gc_roots (void);
97 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
98 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
99 static unsigned bit_count (unsigned long);
100 static unsigned bitmap_popcount (const sbitmap
);
101 static int arm_address_register_rtx_p (rtx
, int);
102 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
103 static bool is_called_in_ARM_mode (tree
);
104 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
105 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
106 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
107 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
108 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
109 inline static int thumb1_index_register_rtx_p (rtx
, int);
110 static int thumb_far_jump_used_p (void);
111 static bool thumb_force_lr_save (void);
112 static unsigned arm_size_return_regs (void);
113 static bool arm_assemble_integer (rtx
, unsigned int, int);
114 static void arm_print_operand (FILE *, rtx
, int);
115 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
116 static bool arm_print_operand_punct_valid_p (unsigned char code
);
117 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
118 static arm_cc
get_arm_condition_code (rtx
);
119 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
120 static const char *output_multi_immediate (rtx
*, const char *, const char *,
122 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
123 static struct machine_function
*arm_init_machine_status (void);
124 static void thumb_exit (FILE *, int);
125 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
126 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
127 static Mnode
*add_minipool_forward_ref (Mfix
*);
128 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
129 static Mnode
*add_minipool_backward_ref (Mfix
*);
130 static void assign_minipool_offsets (Mfix
*);
131 static void arm_print_value (FILE *, rtx
);
132 static void dump_minipool (rtx_insn
*);
133 static int arm_barrier_cost (rtx_insn
*);
134 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
135 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
136 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
138 static void arm_reorg (void);
139 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
140 static unsigned long arm_compute_save_reg0_reg12_mask (void);
141 static unsigned long arm_compute_save_core_reg_mask (void);
142 static unsigned long arm_isr_value (tree
);
143 static unsigned long arm_compute_func_type (void);
144 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
147 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
148 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
150 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
151 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
152 static void arm_output_function_epilogue (FILE *);
153 static void arm_output_function_prologue (FILE *);
154 static int arm_comp_type_attributes (const_tree
, const_tree
);
155 static void arm_set_default_type_attributes (tree
);
156 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
157 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
158 static int optimal_immediate_sequence (enum rtx_code code
,
159 unsigned HOST_WIDE_INT val
,
160 struct four_ints
*return_sequence
);
161 static int optimal_immediate_sequence_1 (enum rtx_code code
,
162 unsigned HOST_WIDE_INT val
,
163 struct four_ints
*return_sequence
,
165 static int arm_get_strip_length (int);
166 static bool arm_function_ok_for_sibcall (tree
, tree
);
167 static machine_mode
arm_promote_function_mode (const_tree
,
170 static bool arm_return_in_memory (const_tree
, const_tree
);
171 static rtx
arm_function_value (const_tree
, const_tree
, bool);
172 static rtx
arm_libcall_value_1 (machine_mode
);
173 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
174 static bool arm_function_value_regno_p (const unsigned int);
175 static void arm_internal_label (FILE *, const char *, unsigned long);
176 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
178 static bool arm_have_conditional_execution (void);
179 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
180 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
181 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
182 static int arm_insn_cost (rtx_insn
*, bool);
183 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
184 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
185 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
186 static void emit_constant_insn (rtx cond
, rtx pattern
);
187 static rtx_insn
*emit_set_insn (rtx
, rtx
);
188 static void arm_add_cfa_adjust_cfa_note (rtx
, int, rtx
, rtx
);
189 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
190 static void arm_emit_multi_reg_pop (unsigned long);
191 static int vfp_emit_fstmd (int, int);
192 static void arm_emit_vfp_multi_reg_pop (int, int, rtx
);
193 static int arm_arg_partial_bytes (cumulative_args_t
,
194 const function_arg_info
&);
195 static rtx
arm_function_arg (cumulative_args_t
, const function_arg_info
&);
196 static void arm_function_arg_advance (cumulative_args_t
,
197 const function_arg_info
&);
198 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
199 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
200 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
202 static rtx
aapcs_libcall_value (machine_mode
);
203 static int aapcs_select_return_coproc (const_tree
, const_tree
);
205 #ifdef OBJECT_FORMAT_ELF
206 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
207 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
210 static void arm_encode_section_info (tree
, rtx
, int);
213 static void arm_file_end (void);
214 static void arm_file_start (void);
215 static void arm_insert_attributes (tree
, tree
*);
217 static void arm_setup_incoming_varargs (cumulative_args_t
,
218 const function_arg_info
&, int *, int);
219 static bool arm_pass_by_reference (cumulative_args_t
,
220 const function_arg_info
&);
221 static bool arm_promote_prototypes (const_tree
);
222 static bool arm_default_short_enums (void);
223 static bool arm_align_anon_bitfield (void);
224 static bool arm_return_in_msb (const_tree
);
225 static bool arm_must_pass_in_stack (const function_arg_info
&);
226 static bool arm_return_in_memory (const_tree
, const_tree
);
228 static void arm_unwind_emit (FILE *, rtx_insn
*);
229 static bool arm_output_ttype (rtx
);
230 static void arm_asm_emit_except_personality (rtx
);
232 static void arm_asm_init_sections (void);
233 static rtx
arm_dwarf_register_span (rtx
);
235 static tree
arm_cxx_guard_type (void);
236 static bool arm_cxx_guard_mask_bit (void);
237 static tree
arm_get_cookie_size (tree
);
238 static bool arm_cookie_has_size (void);
239 static bool arm_cxx_cdtor_returns_this (void);
240 static bool arm_cxx_key_method_may_be_inline (void);
241 static void arm_cxx_determine_class_data_visibility (tree
);
242 static bool arm_cxx_class_data_always_comdat (void);
243 static bool arm_cxx_use_aeabi_atexit (void);
244 static void arm_init_libfuncs (void);
245 static tree
arm_build_builtin_va_list (void);
246 static void arm_expand_builtin_va_start (tree
, rtx
);
247 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
248 static void arm_option_override (void);
249 static void arm_option_restore (struct gcc_options
*, struct gcc_options
*,
250 struct cl_target_option
*);
251 static void arm_override_options_after_change (void);
252 static void arm_option_print (FILE *, int, struct cl_target_option
*);
253 static void arm_set_current_function (tree
);
254 static bool arm_can_inline_p (tree
, tree
);
255 static void arm_relayout_function (tree
);
256 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
257 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
258 static bool arm_sched_can_speculate_insn (rtx_insn
*);
259 static bool arm_macro_fusion_p (void);
260 static bool arm_cannot_copy_insn_p (rtx_insn
*);
261 static int arm_issue_rate (void);
262 static int arm_sched_variable_issue (FILE *, int, rtx_insn
*, int);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
266 static bool arm_output_addr_const_extra (FILE *, rtx
);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree
);
269 static tree
arm_promoted_type (const_tree t
);
270 static bool arm_scalar_mode_supported_p (scalar_mode
);
271 static bool arm_frame_pointer_required (void);
272 static bool arm_can_eliminate (const int, const int);
273 static void arm_asm_trampoline_template (FILE *);
274 static void arm_trampoline_init (rtx
, tree
, rtx
);
275 static rtx
arm_trampoline_adjust_address (rtx
);
276 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
277 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
278 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
279 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
280 static bool arm_array_mode_supported_p (machine_mode
,
281 unsigned HOST_WIDE_INT
);
282 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
283 static bool arm_class_likely_spilled_p (reg_class_t
);
284 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
285 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
286 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
290 static void arm_conditional_register_usage (void);
291 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
292 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
293 static unsigned int arm_autovectorize_vector_modes (vector_modes
*, bool);
294 static int arm_default_branch_cost (bool, bool);
295 static int arm_cortex_a5_branch_cost (bool, bool);
296 static int arm_cortex_m_branch_cost (bool, bool);
297 static int arm_cortex_m7_branch_cost (bool, bool);
299 static bool arm_vectorize_vec_perm_const (machine_mode
, machine_mode
, rtx
, rtx
,
300 rtx
, const vec_perm_indices
&);
302 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
304 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
306 int misalign ATTRIBUTE_UNUSED
);
308 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
309 bool op0_preserve_value
);
310 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
312 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
313 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
315 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
316 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
317 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
319 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
320 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
321 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
322 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
323 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
324 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
325 static rtx_insn
*thumb1_md_asm_adjust (vec
<rtx
> &, vec
<rtx
> &,
327 vec
<const char *> &, vec
<rtx
> &,
328 HARD_REG_SET
&, location_t
);
329 static const char *arm_identify_fpu_from_isa (sbitmap
);
331 /* Table of machine attributes. */
332 static const struct attribute_spec arm_attribute_table
[] =
334 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
335 affects_type_identity, handler, exclude } */
336 /* Function calls made to this symbol must be done indirectly, because
337 it may lie outside of the 26 bit addressing range of a normal function
339 { "long_call", 0, 0, false, true, true, false, NULL
, NULL
},
340 /* Whereas these functions are always known to reside within the 26 bit
342 { "short_call", 0, 0, false, true, true, false, NULL
, NULL
},
343 /* Specify the procedure call conventions for a function. */
344 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute
,
346 /* Interrupt Service Routines have special prologue and epilogue requirements. */
347 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
349 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute
,
351 { "naked", 0, 0, true, false, false, false,
352 arm_handle_fndecl_attribute
, NULL
},
354 /* ARM/PE has three new attributes:
356 dllexport - for exporting a function/variable that will live in a dll
357 dllimport - for importing a function/variable from a dll
359 Microsoft allows multiple declspecs in one __declspec, separating
360 them with spaces. We do NOT support this. Instead, use __declspec
363 { "dllimport", 0, 0, true, false, false, false, NULL
, NULL
},
364 { "dllexport", 0, 0, true, false, false, false, NULL
, NULL
},
365 { "interfacearm", 0, 0, true, false, false, false,
366 arm_handle_fndecl_attribute
, NULL
},
367 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute
,
370 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute
,
372 { "notshared", 0, 0, false, true, false, false,
373 arm_handle_notshared_attribute
, NULL
},
375 /* ARMv8-M Security Extensions support. */
376 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
377 arm_handle_cmse_nonsecure_entry
, NULL
},
378 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
379 arm_handle_cmse_nonsecure_call
, NULL
},
380 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL
, NULL
},
381 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
384 /* Initialize the GCC target structure. */
385 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
386 #undef TARGET_MERGE_DECL_ATTRIBUTES
387 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
390 #undef TARGET_CHECK_BUILTIN_CALL
391 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
393 #undef TARGET_LEGITIMIZE_ADDRESS
394 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
396 #undef TARGET_ATTRIBUTE_TABLE
397 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
399 #undef TARGET_INSERT_ATTRIBUTES
400 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
402 #undef TARGET_ASM_FILE_START
403 #define TARGET_ASM_FILE_START arm_file_start
404 #undef TARGET_ASM_FILE_END
405 #define TARGET_ASM_FILE_END arm_file_end
407 #undef TARGET_ASM_ALIGNED_SI_OP
408 #define TARGET_ASM_ALIGNED_SI_OP NULL
409 #undef TARGET_ASM_INTEGER
410 #define TARGET_ASM_INTEGER arm_assemble_integer
412 #undef TARGET_PRINT_OPERAND
413 #define TARGET_PRINT_OPERAND arm_print_operand
414 #undef TARGET_PRINT_OPERAND_ADDRESS
415 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
416 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
417 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
419 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
420 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
422 #undef TARGET_ASM_FUNCTION_PROLOGUE
423 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
425 #undef TARGET_ASM_FUNCTION_EPILOGUE
426 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
428 #undef TARGET_CAN_INLINE_P
429 #define TARGET_CAN_INLINE_P arm_can_inline_p
431 #undef TARGET_RELAYOUT_FUNCTION
432 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
434 #undef TARGET_OPTION_OVERRIDE
435 #define TARGET_OPTION_OVERRIDE arm_option_override
437 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
438 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
440 #undef TARGET_OPTION_RESTORE
441 #define TARGET_OPTION_RESTORE arm_option_restore
443 #undef TARGET_OPTION_PRINT
444 #define TARGET_OPTION_PRINT arm_option_print
446 #undef TARGET_COMP_TYPE_ATTRIBUTES
447 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
449 #undef TARGET_SCHED_CAN_SPECULATE_INSN
450 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
452 #undef TARGET_SCHED_MACRO_FUSION_P
453 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
455 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
456 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
458 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
459 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
461 #undef TARGET_SCHED_ADJUST_COST
462 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
464 #undef TARGET_SET_CURRENT_FUNCTION
465 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
467 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
468 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
470 #undef TARGET_SCHED_REORDER
471 #define TARGET_SCHED_REORDER arm_sched_reorder
473 #undef TARGET_REGISTER_MOVE_COST
474 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
476 #undef TARGET_MEMORY_MOVE_COST
477 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
479 #undef TARGET_ENCODE_SECTION_INFO
481 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
483 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
489 #undef TARGET_ASM_INTERNAL_LABEL
490 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
492 #undef TARGET_FLOATN_MODE
493 #define TARGET_FLOATN_MODE arm_floatn_mode
495 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
496 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
498 #undef TARGET_FUNCTION_VALUE
499 #define TARGET_FUNCTION_VALUE arm_function_value
501 #undef TARGET_LIBCALL_VALUE
502 #define TARGET_LIBCALL_VALUE arm_libcall_value
504 #undef TARGET_FUNCTION_VALUE_REGNO_P
505 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
507 #undef TARGET_ASM_OUTPUT_MI_THUNK
508 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
509 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
510 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
512 #undef TARGET_RTX_COSTS
513 #define TARGET_RTX_COSTS arm_rtx_costs
514 #undef TARGET_ADDRESS_COST
515 #define TARGET_ADDRESS_COST arm_address_cost
516 #undef TARGET_INSN_COST
517 #define TARGET_INSN_COST arm_insn_cost
519 #undef TARGET_SHIFT_TRUNCATION_MASK
520 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
521 #undef TARGET_VECTOR_MODE_SUPPORTED_P
522 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
523 #undef TARGET_ARRAY_MODE_SUPPORTED_P
524 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
525 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
526 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
527 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
528 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
529 arm_autovectorize_vector_modes
531 #undef TARGET_MACHINE_DEPENDENT_REORG
532 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
534 #undef TARGET_INIT_BUILTINS
535 #define TARGET_INIT_BUILTINS arm_init_builtins
536 #undef TARGET_EXPAND_BUILTIN
537 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
538 #undef TARGET_BUILTIN_DECL
539 #define TARGET_BUILTIN_DECL arm_builtin_decl
541 #undef TARGET_INIT_LIBFUNCS
542 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
544 #undef TARGET_PROMOTE_FUNCTION_MODE
545 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
546 #undef TARGET_PROMOTE_PROTOTYPES
547 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
548 #undef TARGET_PASS_BY_REFERENCE
549 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
550 #undef TARGET_ARG_PARTIAL_BYTES
551 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
552 #undef TARGET_FUNCTION_ARG
553 #define TARGET_FUNCTION_ARG arm_function_arg
554 #undef TARGET_FUNCTION_ARG_ADVANCE
555 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
556 #undef TARGET_FUNCTION_ARG_PADDING
557 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
558 #undef TARGET_FUNCTION_ARG_BOUNDARY
559 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
561 #undef TARGET_SETUP_INCOMING_VARARGS
562 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
564 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
565 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
567 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
568 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
569 #undef TARGET_TRAMPOLINE_INIT
570 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
571 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
572 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
574 #undef TARGET_WARN_FUNC_RETURN
575 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
577 #undef TARGET_DEFAULT_SHORT_ENUMS
578 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
580 #undef TARGET_ALIGN_ANON_BITFIELD
581 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
583 #undef TARGET_NARROW_VOLATILE_BITFIELD
584 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
586 #undef TARGET_CXX_GUARD_TYPE
587 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
589 #undef TARGET_CXX_GUARD_MASK_BIT
590 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
592 #undef TARGET_CXX_GET_COOKIE_SIZE
593 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
595 #undef TARGET_CXX_COOKIE_HAS_SIZE
596 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
598 #undef TARGET_CXX_CDTOR_RETURNS_THIS
599 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
601 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
602 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
604 #undef TARGET_CXX_USE_AEABI_ATEXIT
605 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
607 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
608 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
609 arm_cxx_determine_class_data_visibility
611 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
612 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
614 #undef TARGET_RETURN_IN_MSB
615 #define TARGET_RETURN_IN_MSB arm_return_in_msb
617 #undef TARGET_RETURN_IN_MEMORY
618 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
620 #undef TARGET_MUST_PASS_IN_STACK
621 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
624 #undef TARGET_ASM_UNWIND_EMIT
625 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
627 /* EABI unwinding tables use a different format for the typeinfo tables. */
628 #undef TARGET_ASM_TTYPE
629 #define TARGET_ASM_TTYPE arm_output_ttype
631 #undef TARGET_ARM_EABI_UNWINDER
632 #define TARGET_ARM_EABI_UNWINDER true
634 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
635 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
637 #endif /* ARM_UNWIND_INFO */
639 #undef TARGET_ASM_INIT_SECTIONS
640 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
642 #undef TARGET_DWARF_REGISTER_SPAN
643 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
645 #undef TARGET_CANNOT_COPY_INSN_P
646 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
649 #undef TARGET_HAVE_TLS
650 #define TARGET_HAVE_TLS true
653 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
654 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
656 #undef TARGET_LEGITIMATE_CONSTANT_P
657 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
659 #undef TARGET_CANNOT_FORCE_CONST_MEM
660 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
662 #undef TARGET_MAX_ANCHOR_OFFSET
663 #define TARGET_MAX_ANCHOR_OFFSET 4095
665 /* The minimum is set such that the total size of the block
666 for a particular anchor is -4088 + 1 + 4095 bytes, which is
667 divisible by eight, ensuring natural spacing of anchors. */
668 #undef TARGET_MIN_ANCHOR_OFFSET
669 #define TARGET_MIN_ANCHOR_OFFSET -4088
671 #undef TARGET_SCHED_ISSUE_RATE
672 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
674 #undef TARGET_SCHED_VARIABLE_ISSUE
675 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
677 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
678 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
679 arm_first_cycle_multipass_dfa_lookahead
681 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
682 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
683 arm_first_cycle_multipass_dfa_lookahead_guard
685 #undef TARGET_MANGLE_TYPE
686 #define TARGET_MANGLE_TYPE arm_mangle_type
688 #undef TARGET_INVALID_CONVERSION
689 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
691 #undef TARGET_INVALID_UNARY_OP
692 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
694 #undef TARGET_INVALID_BINARY_OP
695 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
697 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
698 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
700 #undef TARGET_BUILD_BUILTIN_VA_LIST
701 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
702 #undef TARGET_EXPAND_BUILTIN_VA_START
703 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
705 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
708 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
709 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
712 #undef TARGET_LEGITIMATE_ADDRESS_P
713 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
715 #undef TARGET_PREFERRED_RELOAD_CLASS
716 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
718 #undef TARGET_PROMOTED_TYPE
719 #define TARGET_PROMOTED_TYPE arm_promoted_type
721 #undef TARGET_SCALAR_MODE_SUPPORTED_P
722 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
724 #undef TARGET_COMPUTE_FRAME_LAYOUT
725 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
727 #undef TARGET_FRAME_POINTER_REQUIRED
728 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
730 #undef TARGET_CAN_ELIMINATE
731 #define TARGET_CAN_ELIMINATE arm_can_eliminate
733 #undef TARGET_CONDITIONAL_REGISTER_USAGE
734 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
736 #undef TARGET_CLASS_LIKELY_SPILLED_P
737 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
739 #undef TARGET_VECTORIZE_BUILTINS
740 #define TARGET_VECTORIZE_BUILTINS
742 #undef TARGET_VECTOR_ALIGNMENT
743 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
745 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
746 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
747 arm_vector_alignment_reachable
749 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
750 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
751 arm_builtin_support_vector_misalignment
753 #undef TARGET_PREFERRED_RENAME_CLASS
754 #define TARGET_PREFERRED_RENAME_CLASS \
755 arm_preferred_rename_class
757 #undef TARGET_VECTORIZE_VEC_PERM_CONST
758 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
760 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
761 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
762 arm_builtin_vectorization_cost
764 #undef TARGET_CANONICALIZE_COMPARISON
765 #define TARGET_CANONICALIZE_COMPARISON \
766 arm_canonicalize_comparison
768 #undef TARGET_ASAN_SHADOW_OFFSET
769 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
771 #undef MAX_INSN_PER_IT_BLOCK
772 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
774 #undef TARGET_CAN_USE_DOLOOP_P
775 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
777 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
778 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
780 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
781 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
783 #undef TARGET_SCHED_FUSION_PRIORITY
784 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
786 #undef TARGET_ASM_FUNCTION_SECTION
787 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
789 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
790 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
792 #undef TARGET_SECTION_TYPE_FLAGS
793 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
796 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
798 #undef TARGET_C_EXCESS_PRECISION
799 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
801 /* Although the architecture reserves bits 0 and 1, only the former is
802 used for ARM/Thumb ISA selection in v7 and earlier versions. */
803 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
804 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
806 #undef TARGET_FIXED_CONDITION_CODE_REGS
807 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
809 #undef TARGET_HARD_REGNO_NREGS
810 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
811 #undef TARGET_HARD_REGNO_MODE_OK
812 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
814 #undef TARGET_MODES_TIEABLE_P
815 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
817 #undef TARGET_CAN_CHANGE_MODE_CLASS
818 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
820 #undef TARGET_CONSTANT_ALIGNMENT
821 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
823 #undef TARGET_INVALID_WITHIN_DOLOOP
824 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
826 #undef TARGET_MD_ASM_ADJUST
827 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
829 #undef TARGET_STACK_PROTECT_GUARD
830 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
832 #undef TARGET_VECTORIZE_GET_MASK_MODE
833 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
835 /* Obstack for minipool constant handling. */
836 static struct obstack minipool_obstack
;
837 static char * minipool_startobj
;
839 /* The maximum number of insns skipped which
840 will be conditionalised if possible. */
841 static int max_insns_skipped
= 5;
843 /* True if we are currently building a constant table. */
844 int making_const_table
;
846 /* The processor for which instructions should be scheduled. */
847 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
849 /* The current tuning set. */
850 const struct tune_params
*current_tune
;
852 /* Which floating point hardware to schedule for. */
855 /* Used for Thumb call_via trampolines. */
856 rtx thumb_call_via_label
[14];
857 static int thumb_call_reg_needed
;
859 /* The bits in this mask specify which instruction scheduling options should
861 unsigned int tune_flags
= 0;
863 /* The highest ARM architecture version supported by the
865 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
867 /* Active target architecture and tuning. */
869 struct arm_build_target arm_active_target
;
871 /* The following are used in the arm.md file as equivalents to bits
872 in the above two flag variables. */
874 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
877 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
880 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
883 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
886 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
889 /* Nonzero if this chip supports the ARM 6K extensions. */
892 /* Nonzero if this chip supports the ARM 6KZ extensions. */
895 /* Nonzero if instructions present in ARMv6-M can be used. */
898 /* Nonzero if this chip supports the ARM 7 extensions. */
901 /* Nonzero if this chip supports the Large Physical Address Extension. */
902 int arm_arch_lpae
= 0;
904 /* Nonzero if instructions not present in the 'M' profile can be used. */
905 int arm_arch_notm
= 0;
907 /* Nonzero if instructions present in ARMv7E-M can be used. */
910 /* Nonzero if instructions present in ARMv8 can be used. */
913 /* Nonzero if this chip supports the ARMv8.1 extensions. */
916 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
919 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
922 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
924 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
926 int arm_arch8_1m_main
= 0;
928 /* Nonzero if this chip supports the FP16 instructions extension of ARM
930 int arm_fp16_inst
= 0;
932 /* Nonzero if this chip can benefit from load scheduling. */
933 int arm_ld_sched
= 0;
935 /* Nonzero if this chip is a StrongARM. */
936 int arm_tune_strongarm
= 0;
938 /* Nonzero if this chip supports Intel Wireless MMX technology. */
939 int arm_arch_iwmmxt
= 0;
941 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
942 int arm_arch_iwmmxt2
= 0;
944 /* Nonzero if this chip is an XScale. */
945 int arm_arch_xscale
= 0;
947 /* Nonzero if tuning for XScale */
948 int arm_tune_xscale
= 0;
950 /* Nonzero if we want to tune for stores that access the write-buffer.
951 This typically means an ARM6 or ARM7 with MMU or MPU. */
952 int arm_tune_wbuf
= 0;
954 /* Nonzero if tuning for Cortex-A9. */
955 int arm_tune_cortex_a9
= 0;
957 /* Nonzero if we should define __THUMB_INTERWORK__ in the
959 XXX This is a bit of a hack, it's intended to help work around
960 problems in GLD which doesn't understand that armv5t code is
961 interworking clean. */
962 int arm_cpp_interwork
= 0;
964 /* Nonzero if chip supports Thumb 1. */
967 /* Nonzero if chip supports Thumb 2. */
970 /* Nonzero if chip supports integer division instruction. */
971 int arm_arch_arm_hwdiv
;
972 int arm_arch_thumb_hwdiv
;
974 /* Nonzero if chip disallows volatile memory access in IT block. */
975 int arm_arch_no_volatile_ce
;
977 /* Nonzero if we shouldn't use literal pools. */
978 bool arm_disable_literal_pool
= false;
980 /* The register number to be used for the PIC offset register. */
981 unsigned arm_pic_register
= INVALID_REGNUM
;
983 enum arm_pcs arm_pcs_default
;
985 /* For an explanation of these variables, see final_prescan_insn below. */
987 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
988 enum arm_cond_code arm_current_cc
;
991 int arm_target_label
;
992 /* The number of conditionally executed insns, including the current insn. */
993 int arm_condexec_count
= 0;
994 /* A bitmask specifying the patterns for the IT block.
995 Zero means do not output an IT block before this insn. */
996 int arm_condexec_mask
= 0;
997 /* The number of bits used in arm_condexec_mask. */
998 int arm_condexec_masklen
= 0;
1000 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1001 int arm_arch_crc
= 0;
1003 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1004 int arm_arch_dotprod
= 0;
1006 /* Nonzero if chip supports the ARMv8-M security extensions. */
1007 int arm_arch_cmse
= 0;
1009 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1010 int arm_m_profile_small_mul
= 0;
1012 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1013 int arm_arch_i8mm
= 0;
1015 /* Nonzero if chip supports the BFloat16 instructions. */
1016 int arm_arch_bf16
= 0;
1018 /* Nonzero if chip supports the Custom Datapath Extension. */
1019 int arm_arch_cde
= 0;
1020 int arm_arch_cde_coproc
= 0;
1021 const int arm_arch_cde_coproc_bits
[] = {
1022 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1025 /* The condition codes of the ARM, and the inverse function. */
1026 static const char * const arm_condition_codes
[] =
1028 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1029 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1032 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1033 int arm_regs_in_sequence
[] =
1035 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1038 #define DEF_FP_SYSREG(reg) #reg,
1039 const char *fp_sysreg_names
[NB_FP_SYSREGS
] = {
1042 #undef DEF_FP_SYSREG
1044 #define ARM_LSL_NAME "lsl"
1045 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1047 #define THUMB2_WORK_REGS \
1048 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1049 | (1 << SP_REGNUM) \
1050 | (1 << PC_REGNUM) \
1051 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1052 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1055 /* Initialization code. */
1059 enum processor_type scheduler
;
1060 unsigned int tune_flags
;
1061 const struct tune_params
*tune
;
1064 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1065 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1072 /* arm generic vectorizer costs. */
1074 struct cpu_vec_costs arm_default_vec_cost
= {
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 1, /* vec_unalign_load_cost. */
1083 1, /* vec_unalign_store_cost. */
1084 1, /* vec_store_cost. */
1085 3, /* cond_taken_branch_cost. */
1086 1, /* cond_not_taken_branch_cost. */
1089 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1090 #include "aarch-cost-tables.h"
1094 const struct cpu_cost_table cortexa9_extra_costs
=
1101 COSTS_N_INSNS (1), /* shift_reg. */
1102 COSTS_N_INSNS (1), /* arith_shift. */
1103 COSTS_N_INSNS (2), /* arith_shift_reg. */
1105 COSTS_N_INSNS (1), /* log_shift_reg. */
1106 COSTS_N_INSNS (1), /* extend. */
1107 COSTS_N_INSNS (2), /* extend_arith. */
1108 COSTS_N_INSNS (1), /* bfi. */
1109 COSTS_N_INSNS (1), /* bfx. */
1113 true /* non_exec_costs_exec. */
1118 COSTS_N_INSNS (3), /* simple. */
1119 COSTS_N_INSNS (3), /* flag_setting. */
1120 COSTS_N_INSNS (2), /* extend. */
1121 COSTS_N_INSNS (3), /* add. */
1122 COSTS_N_INSNS (2), /* extend_add. */
1123 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1127 0, /* simple (N/A). */
1128 0, /* flag_setting (N/A). */
1129 COSTS_N_INSNS (4), /* extend. */
1131 COSTS_N_INSNS (4), /* extend_add. */
1137 COSTS_N_INSNS (2), /* load. */
1138 COSTS_N_INSNS (2), /* load_sign_extend. */
1139 COSTS_N_INSNS (2), /* ldrd. */
1140 COSTS_N_INSNS (2), /* ldm_1st. */
1141 1, /* ldm_regs_per_insn_1st. */
1142 2, /* ldm_regs_per_insn_subsequent. */
1143 COSTS_N_INSNS (5), /* loadf. */
1144 COSTS_N_INSNS (5), /* loadd. */
1145 COSTS_N_INSNS (1), /* load_unaligned. */
1146 COSTS_N_INSNS (2), /* store. */
1147 COSTS_N_INSNS (2), /* strd. */
1148 COSTS_N_INSNS (2), /* stm_1st. */
1149 1, /* stm_regs_per_insn_1st. */
1150 2, /* stm_regs_per_insn_subsequent. */
1151 COSTS_N_INSNS (1), /* storef. */
1152 COSTS_N_INSNS (1), /* stored. */
1153 COSTS_N_INSNS (1), /* store_unaligned. */
1154 COSTS_N_INSNS (1), /* loadv. */
1155 COSTS_N_INSNS (1) /* storev. */
1160 COSTS_N_INSNS (14), /* div. */
1161 COSTS_N_INSNS (4), /* mult. */
1162 COSTS_N_INSNS (7), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (3), /* addsub. */
1165 COSTS_N_INSNS (1), /* fpconst. */
1166 COSTS_N_INSNS (1), /* neg. */
1167 COSTS_N_INSNS (3), /* compare. */
1168 COSTS_N_INSNS (3), /* widen. */
1169 COSTS_N_INSNS (3), /* narrow. */
1170 COSTS_N_INSNS (3), /* toint. */
1171 COSTS_N_INSNS (3), /* fromint. */
1172 COSTS_N_INSNS (3) /* roundint. */
1176 COSTS_N_INSNS (24), /* div. */
1177 COSTS_N_INSNS (5), /* mult. */
1178 COSTS_N_INSNS (8), /* mult_addsub. */
1179 COSTS_N_INSNS (30), /* fma. */
1180 COSTS_N_INSNS (3), /* addsub. */
1181 COSTS_N_INSNS (1), /* fpconst. */
1182 COSTS_N_INSNS (1), /* neg. */
1183 COSTS_N_INSNS (3), /* compare. */
1184 COSTS_N_INSNS (3), /* widen. */
1185 COSTS_N_INSNS (3), /* narrow. */
1186 COSTS_N_INSNS (3), /* toint. */
1187 COSTS_N_INSNS (3), /* fromint. */
1188 COSTS_N_INSNS (3) /* roundint. */
1193 COSTS_N_INSNS (1), /* alu. */
1194 COSTS_N_INSNS (4), /* mult. */
1195 COSTS_N_INSNS (1), /* movi. */
1196 COSTS_N_INSNS (2), /* dup. */
1197 COSTS_N_INSNS (2) /* extract. */
1201 const struct cpu_cost_table cortexa8_extra_costs
=
1207 COSTS_N_INSNS (1), /* shift. */
1209 COSTS_N_INSNS (1), /* arith_shift. */
1210 0, /* arith_shift_reg. */
1211 COSTS_N_INSNS (1), /* log_shift. */
1212 0, /* log_shift_reg. */
1214 0, /* extend_arith. */
1220 true /* non_exec_costs_exec. */
1225 COSTS_N_INSNS (1), /* simple. */
1226 COSTS_N_INSNS (1), /* flag_setting. */
1227 COSTS_N_INSNS (1), /* extend. */
1228 COSTS_N_INSNS (1), /* add. */
1229 COSTS_N_INSNS (1), /* extend_add. */
1230 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1234 0, /* simple (N/A). */
1235 0, /* flag_setting (N/A). */
1236 COSTS_N_INSNS (2), /* extend. */
1238 COSTS_N_INSNS (2), /* extend_add. */
1244 COSTS_N_INSNS (1), /* load. */
1245 COSTS_N_INSNS (1), /* load_sign_extend. */
1246 COSTS_N_INSNS (1), /* ldrd. */
1247 COSTS_N_INSNS (1), /* ldm_1st. */
1248 1, /* ldm_regs_per_insn_1st. */
1249 2, /* ldm_regs_per_insn_subsequent. */
1250 COSTS_N_INSNS (1), /* loadf. */
1251 COSTS_N_INSNS (1), /* loadd. */
1252 COSTS_N_INSNS (1), /* load_unaligned. */
1253 COSTS_N_INSNS (1), /* store. */
1254 COSTS_N_INSNS (1), /* strd. */
1255 COSTS_N_INSNS (1), /* stm_1st. */
1256 1, /* stm_regs_per_insn_1st. */
1257 2, /* stm_regs_per_insn_subsequent. */
1258 COSTS_N_INSNS (1), /* storef. */
1259 COSTS_N_INSNS (1), /* stored. */
1260 COSTS_N_INSNS (1), /* store_unaligned. */
1261 COSTS_N_INSNS (1), /* loadv. */
1262 COSTS_N_INSNS (1) /* storev. */
1267 COSTS_N_INSNS (36), /* div. */
1268 COSTS_N_INSNS (11), /* mult. */
1269 COSTS_N_INSNS (20), /* mult_addsub. */
1270 COSTS_N_INSNS (30), /* fma. */
1271 COSTS_N_INSNS (9), /* addsub. */
1272 COSTS_N_INSNS (3), /* fpconst. */
1273 COSTS_N_INSNS (3), /* neg. */
1274 COSTS_N_INSNS (6), /* compare. */
1275 COSTS_N_INSNS (4), /* widen. */
1276 COSTS_N_INSNS (4), /* narrow. */
1277 COSTS_N_INSNS (8), /* toint. */
1278 COSTS_N_INSNS (8), /* fromint. */
1279 COSTS_N_INSNS (8) /* roundint. */
1283 COSTS_N_INSNS (64), /* div. */
1284 COSTS_N_INSNS (16), /* mult. */
1285 COSTS_N_INSNS (25), /* mult_addsub. */
1286 COSTS_N_INSNS (30), /* fma. */
1287 COSTS_N_INSNS (9), /* addsub. */
1288 COSTS_N_INSNS (3), /* fpconst. */
1289 COSTS_N_INSNS (3), /* neg. */
1290 COSTS_N_INSNS (6), /* compare. */
1291 COSTS_N_INSNS (6), /* widen. */
1292 COSTS_N_INSNS (6), /* narrow. */
1293 COSTS_N_INSNS (8), /* toint. */
1294 COSTS_N_INSNS (8), /* fromint. */
1295 COSTS_N_INSNS (8) /* roundint. */
1300 COSTS_N_INSNS (1), /* alu. */
1301 COSTS_N_INSNS (4), /* mult. */
1302 COSTS_N_INSNS (1), /* movi. */
1303 COSTS_N_INSNS (2), /* dup. */
1304 COSTS_N_INSNS (2) /* extract. */
1308 const struct cpu_cost_table cortexa5_extra_costs
=
1314 COSTS_N_INSNS (1), /* shift. */
1315 COSTS_N_INSNS (1), /* shift_reg. */
1316 COSTS_N_INSNS (1), /* arith_shift. */
1317 COSTS_N_INSNS (1), /* arith_shift_reg. */
1318 COSTS_N_INSNS (1), /* log_shift. */
1319 COSTS_N_INSNS (1), /* log_shift_reg. */
1320 COSTS_N_INSNS (1), /* extend. */
1321 COSTS_N_INSNS (1), /* extend_arith. */
1322 COSTS_N_INSNS (1), /* bfi. */
1323 COSTS_N_INSNS (1), /* bfx. */
1324 COSTS_N_INSNS (1), /* clz. */
1325 COSTS_N_INSNS (1), /* rev. */
1327 true /* non_exec_costs_exec. */
1334 COSTS_N_INSNS (1), /* flag_setting. */
1335 COSTS_N_INSNS (1), /* extend. */
1336 COSTS_N_INSNS (1), /* add. */
1337 COSTS_N_INSNS (1), /* extend_add. */
1338 COSTS_N_INSNS (7) /* idiv. */
1342 0, /* simple (N/A). */
1343 0, /* flag_setting (N/A). */
1344 COSTS_N_INSNS (1), /* extend. */
1346 COSTS_N_INSNS (2), /* extend_add. */
1352 COSTS_N_INSNS (1), /* load. */
1353 COSTS_N_INSNS (1), /* load_sign_extend. */
1354 COSTS_N_INSNS (6), /* ldrd. */
1355 COSTS_N_INSNS (1), /* ldm_1st. */
1356 1, /* ldm_regs_per_insn_1st. */
1357 2, /* ldm_regs_per_insn_subsequent. */
1358 COSTS_N_INSNS (2), /* loadf. */
1359 COSTS_N_INSNS (4), /* loadd. */
1360 COSTS_N_INSNS (1), /* load_unaligned. */
1361 COSTS_N_INSNS (1), /* store. */
1362 COSTS_N_INSNS (3), /* strd. */
1363 COSTS_N_INSNS (1), /* stm_1st. */
1364 1, /* stm_regs_per_insn_1st. */
1365 2, /* stm_regs_per_insn_subsequent. */
1366 COSTS_N_INSNS (2), /* storef. */
1367 COSTS_N_INSNS (2), /* stored. */
1368 COSTS_N_INSNS (1), /* store_unaligned. */
1369 COSTS_N_INSNS (1), /* loadv. */
1370 COSTS_N_INSNS (1) /* storev. */
1375 COSTS_N_INSNS (15), /* div. */
1376 COSTS_N_INSNS (3), /* mult. */
1377 COSTS_N_INSNS (7), /* mult_addsub. */
1378 COSTS_N_INSNS (7), /* fma. */
1379 COSTS_N_INSNS (3), /* addsub. */
1380 COSTS_N_INSNS (3), /* fpconst. */
1381 COSTS_N_INSNS (3), /* neg. */
1382 COSTS_N_INSNS (3), /* compare. */
1383 COSTS_N_INSNS (3), /* widen. */
1384 COSTS_N_INSNS (3), /* narrow. */
1385 COSTS_N_INSNS (3), /* toint. */
1386 COSTS_N_INSNS (3), /* fromint. */
1387 COSTS_N_INSNS (3) /* roundint. */
1391 COSTS_N_INSNS (30), /* div. */
1392 COSTS_N_INSNS (6), /* mult. */
1393 COSTS_N_INSNS (10), /* mult_addsub. */
1394 COSTS_N_INSNS (7), /* fma. */
1395 COSTS_N_INSNS (3), /* addsub. */
1396 COSTS_N_INSNS (3), /* fpconst. */
1397 COSTS_N_INSNS (3), /* neg. */
1398 COSTS_N_INSNS (3), /* compare. */
1399 COSTS_N_INSNS (3), /* widen. */
1400 COSTS_N_INSNS (3), /* narrow. */
1401 COSTS_N_INSNS (3), /* toint. */
1402 COSTS_N_INSNS (3), /* fromint. */
1403 COSTS_N_INSNS (3) /* roundint. */
1408 COSTS_N_INSNS (1), /* alu. */
1409 COSTS_N_INSNS (4), /* mult. */
1410 COSTS_N_INSNS (1), /* movi. */
1411 COSTS_N_INSNS (2), /* dup. */
1412 COSTS_N_INSNS (2) /* extract. */
1417 const struct cpu_cost_table cortexa7_extra_costs
=
1423 COSTS_N_INSNS (1), /* shift. */
1424 COSTS_N_INSNS (1), /* shift_reg. */
1425 COSTS_N_INSNS (1), /* arith_shift. */
1426 COSTS_N_INSNS (1), /* arith_shift_reg. */
1427 COSTS_N_INSNS (1), /* log_shift. */
1428 COSTS_N_INSNS (1), /* log_shift_reg. */
1429 COSTS_N_INSNS (1), /* extend. */
1430 COSTS_N_INSNS (1), /* extend_arith. */
1431 COSTS_N_INSNS (1), /* bfi. */
1432 COSTS_N_INSNS (1), /* bfx. */
1433 COSTS_N_INSNS (1), /* clz. */
1434 COSTS_N_INSNS (1), /* rev. */
1436 true /* non_exec_costs_exec. */
1443 COSTS_N_INSNS (1), /* flag_setting. */
1444 COSTS_N_INSNS (1), /* extend. */
1445 COSTS_N_INSNS (1), /* add. */
1446 COSTS_N_INSNS (1), /* extend_add. */
1447 COSTS_N_INSNS (7) /* idiv. */
1451 0, /* simple (N/A). */
1452 0, /* flag_setting (N/A). */
1453 COSTS_N_INSNS (1), /* extend. */
1455 COSTS_N_INSNS (2), /* extend_add. */
1461 COSTS_N_INSNS (1), /* load. */
1462 COSTS_N_INSNS (1), /* load_sign_extend. */
1463 COSTS_N_INSNS (3), /* ldrd. */
1464 COSTS_N_INSNS (1), /* ldm_1st. */
1465 1, /* ldm_regs_per_insn_1st. */
1466 2, /* ldm_regs_per_insn_subsequent. */
1467 COSTS_N_INSNS (2), /* loadf. */
1468 COSTS_N_INSNS (2), /* loadd. */
1469 COSTS_N_INSNS (1), /* load_unaligned. */
1470 COSTS_N_INSNS (1), /* store. */
1471 COSTS_N_INSNS (3), /* strd. */
1472 COSTS_N_INSNS (1), /* stm_1st. */
1473 1, /* stm_regs_per_insn_1st. */
1474 2, /* stm_regs_per_insn_subsequent. */
1475 COSTS_N_INSNS (2), /* storef. */
1476 COSTS_N_INSNS (2), /* stored. */
1477 COSTS_N_INSNS (1), /* store_unaligned. */
1478 COSTS_N_INSNS (1), /* loadv. */
1479 COSTS_N_INSNS (1) /* storev. */
1484 COSTS_N_INSNS (15), /* div. */
1485 COSTS_N_INSNS (3), /* mult. */
1486 COSTS_N_INSNS (7), /* mult_addsub. */
1487 COSTS_N_INSNS (7), /* fma. */
1488 COSTS_N_INSNS (3), /* addsub. */
1489 COSTS_N_INSNS (3), /* fpconst. */
1490 COSTS_N_INSNS (3), /* neg. */
1491 COSTS_N_INSNS (3), /* compare. */
1492 COSTS_N_INSNS (3), /* widen. */
1493 COSTS_N_INSNS (3), /* narrow. */
1494 COSTS_N_INSNS (3), /* toint. */
1495 COSTS_N_INSNS (3), /* fromint. */
1496 COSTS_N_INSNS (3) /* roundint. */
1500 COSTS_N_INSNS (30), /* div. */
1501 COSTS_N_INSNS (6), /* mult. */
1502 COSTS_N_INSNS (10), /* mult_addsub. */
1503 COSTS_N_INSNS (7), /* fma. */
1504 COSTS_N_INSNS (3), /* addsub. */
1505 COSTS_N_INSNS (3), /* fpconst. */
1506 COSTS_N_INSNS (3), /* neg. */
1507 COSTS_N_INSNS (3), /* compare. */
1508 COSTS_N_INSNS (3), /* widen. */
1509 COSTS_N_INSNS (3), /* narrow. */
1510 COSTS_N_INSNS (3), /* toint. */
1511 COSTS_N_INSNS (3), /* fromint. */
1512 COSTS_N_INSNS (3) /* roundint. */
1517 COSTS_N_INSNS (1), /* alu. */
1518 COSTS_N_INSNS (4), /* mult. */
1519 COSTS_N_INSNS (1), /* movi. */
1520 COSTS_N_INSNS (2), /* dup. */
1521 COSTS_N_INSNS (2) /* extract. */
1525 const struct cpu_cost_table cortexa12_extra_costs
=
1532 COSTS_N_INSNS (1), /* shift_reg. */
1533 COSTS_N_INSNS (1), /* arith_shift. */
1534 COSTS_N_INSNS (1), /* arith_shift_reg. */
1535 COSTS_N_INSNS (1), /* log_shift. */
1536 COSTS_N_INSNS (1), /* log_shift_reg. */
1538 COSTS_N_INSNS (1), /* extend_arith. */
1540 COSTS_N_INSNS (1), /* bfx. */
1541 COSTS_N_INSNS (1), /* clz. */
1542 COSTS_N_INSNS (1), /* rev. */
1544 true /* non_exec_costs_exec. */
1549 COSTS_N_INSNS (2), /* simple. */
1550 COSTS_N_INSNS (3), /* flag_setting. */
1551 COSTS_N_INSNS (2), /* extend. */
1552 COSTS_N_INSNS (3), /* add. */
1553 COSTS_N_INSNS (2), /* extend_add. */
1554 COSTS_N_INSNS (18) /* idiv. */
1558 0, /* simple (N/A). */
1559 0, /* flag_setting (N/A). */
1560 COSTS_N_INSNS (3), /* extend. */
1562 COSTS_N_INSNS (3), /* extend_add. */
1568 COSTS_N_INSNS (3), /* load. */
1569 COSTS_N_INSNS (3), /* load_sign_extend. */
1570 COSTS_N_INSNS (3), /* ldrd. */
1571 COSTS_N_INSNS (3), /* ldm_1st. */
1572 1, /* ldm_regs_per_insn_1st. */
1573 2, /* ldm_regs_per_insn_subsequent. */
1574 COSTS_N_INSNS (3), /* loadf. */
1575 COSTS_N_INSNS (3), /* loadd. */
1576 0, /* load_unaligned. */
1580 1, /* stm_regs_per_insn_1st. */
1581 2, /* stm_regs_per_insn_subsequent. */
1582 COSTS_N_INSNS (2), /* storef. */
1583 COSTS_N_INSNS (2), /* stored. */
1584 0, /* store_unaligned. */
1585 COSTS_N_INSNS (1), /* loadv. */
1586 COSTS_N_INSNS (1) /* storev. */
1591 COSTS_N_INSNS (17), /* div. */
1592 COSTS_N_INSNS (4), /* mult. */
1593 COSTS_N_INSNS (8), /* mult_addsub. */
1594 COSTS_N_INSNS (8), /* fma. */
1595 COSTS_N_INSNS (4), /* addsub. */
1596 COSTS_N_INSNS (2), /* fpconst. */
1597 COSTS_N_INSNS (2), /* neg. */
1598 COSTS_N_INSNS (2), /* compare. */
1599 COSTS_N_INSNS (4), /* widen. */
1600 COSTS_N_INSNS (4), /* narrow. */
1601 COSTS_N_INSNS (4), /* toint. */
1602 COSTS_N_INSNS (4), /* fromint. */
1603 COSTS_N_INSNS (4) /* roundint. */
1607 COSTS_N_INSNS (31), /* div. */
1608 COSTS_N_INSNS (4), /* mult. */
1609 COSTS_N_INSNS (8), /* mult_addsub. */
1610 COSTS_N_INSNS (8), /* fma. */
1611 COSTS_N_INSNS (4), /* addsub. */
1612 COSTS_N_INSNS (2), /* fpconst. */
1613 COSTS_N_INSNS (2), /* neg. */
1614 COSTS_N_INSNS (2), /* compare. */
1615 COSTS_N_INSNS (4), /* widen. */
1616 COSTS_N_INSNS (4), /* narrow. */
1617 COSTS_N_INSNS (4), /* toint. */
1618 COSTS_N_INSNS (4), /* fromint. */
1619 COSTS_N_INSNS (4) /* roundint. */
1624 COSTS_N_INSNS (1), /* alu. */
1625 COSTS_N_INSNS (4), /* mult. */
1626 COSTS_N_INSNS (1), /* movi. */
1627 COSTS_N_INSNS (2), /* dup. */
1628 COSTS_N_INSNS (2) /* extract. */
1632 const struct cpu_cost_table cortexa15_extra_costs
=
1640 COSTS_N_INSNS (1), /* arith_shift. */
1641 COSTS_N_INSNS (1), /* arith_shift_reg. */
1642 COSTS_N_INSNS (1), /* log_shift. */
1643 COSTS_N_INSNS (1), /* log_shift_reg. */
1645 COSTS_N_INSNS (1), /* extend_arith. */
1646 COSTS_N_INSNS (1), /* bfi. */
1651 true /* non_exec_costs_exec. */
1656 COSTS_N_INSNS (2), /* simple. */
1657 COSTS_N_INSNS (3), /* flag_setting. */
1658 COSTS_N_INSNS (2), /* extend. */
1659 COSTS_N_INSNS (2), /* add. */
1660 COSTS_N_INSNS (2), /* extend_add. */
1661 COSTS_N_INSNS (18) /* idiv. */
1665 0, /* simple (N/A). */
1666 0, /* flag_setting (N/A). */
1667 COSTS_N_INSNS (3), /* extend. */
1669 COSTS_N_INSNS (3), /* extend_add. */
1675 COSTS_N_INSNS (3), /* load. */
1676 COSTS_N_INSNS (3), /* load_sign_extend. */
1677 COSTS_N_INSNS (3), /* ldrd. */
1678 COSTS_N_INSNS (4), /* ldm_1st. */
1679 1, /* ldm_regs_per_insn_1st. */
1680 2, /* ldm_regs_per_insn_subsequent. */
1681 COSTS_N_INSNS (4), /* loadf. */
1682 COSTS_N_INSNS (4), /* loadd. */
1683 0, /* load_unaligned. */
1686 COSTS_N_INSNS (1), /* stm_1st. */
1687 1, /* stm_regs_per_insn_1st. */
1688 2, /* stm_regs_per_insn_subsequent. */
1691 0, /* store_unaligned. */
1692 COSTS_N_INSNS (1), /* loadv. */
1693 COSTS_N_INSNS (1) /* storev. */
1698 COSTS_N_INSNS (17), /* div. */
1699 COSTS_N_INSNS (4), /* mult. */
1700 COSTS_N_INSNS (8), /* mult_addsub. */
1701 COSTS_N_INSNS (8), /* fma. */
1702 COSTS_N_INSNS (4), /* addsub. */
1703 COSTS_N_INSNS (2), /* fpconst. */
1704 COSTS_N_INSNS (2), /* neg. */
1705 COSTS_N_INSNS (5), /* compare. */
1706 COSTS_N_INSNS (4), /* widen. */
1707 COSTS_N_INSNS (4), /* narrow. */
1708 COSTS_N_INSNS (4), /* toint. */
1709 COSTS_N_INSNS (4), /* fromint. */
1710 COSTS_N_INSNS (4) /* roundint. */
1714 COSTS_N_INSNS (31), /* div. */
1715 COSTS_N_INSNS (4), /* mult. */
1716 COSTS_N_INSNS (8), /* mult_addsub. */
1717 COSTS_N_INSNS (8), /* fma. */
1718 COSTS_N_INSNS (4), /* addsub. */
1719 COSTS_N_INSNS (2), /* fpconst. */
1720 COSTS_N_INSNS (2), /* neg. */
1721 COSTS_N_INSNS (2), /* compare. */
1722 COSTS_N_INSNS (4), /* widen. */
1723 COSTS_N_INSNS (4), /* narrow. */
1724 COSTS_N_INSNS (4), /* toint. */
1725 COSTS_N_INSNS (4), /* fromint. */
1726 COSTS_N_INSNS (4) /* roundint. */
1731 COSTS_N_INSNS (1), /* alu. */
1732 COSTS_N_INSNS (4), /* mult. */
1733 COSTS_N_INSNS (1), /* movi. */
1734 COSTS_N_INSNS (2), /* dup. */
1735 COSTS_N_INSNS (2) /* extract. */
1739 const struct cpu_cost_table v7m_extra_costs
=
1747 0, /* arith_shift. */
1748 COSTS_N_INSNS (1), /* arith_shift_reg. */
1750 COSTS_N_INSNS (1), /* log_shift_reg. */
1752 COSTS_N_INSNS (1), /* extend_arith. */
1757 COSTS_N_INSNS (1), /* non_exec. */
1758 false /* non_exec_costs_exec. */
1763 COSTS_N_INSNS (1), /* simple. */
1764 COSTS_N_INSNS (1), /* flag_setting. */
1765 COSTS_N_INSNS (2), /* extend. */
1766 COSTS_N_INSNS (1), /* add. */
1767 COSTS_N_INSNS (3), /* extend_add. */
1768 COSTS_N_INSNS (8) /* idiv. */
1772 0, /* simple (N/A). */
1773 0, /* flag_setting (N/A). */
1774 COSTS_N_INSNS (2), /* extend. */
1776 COSTS_N_INSNS (3), /* extend_add. */
1782 COSTS_N_INSNS (2), /* load. */
1783 0, /* load_sign_extend. */
1784 COSTS_N_INSNS (3), /* ldrd. */
1785 COSTS_N_INSNS (2), /* ldm_1st. */
1786 1, /* ldm_regs_per_insn_1st. */
1787 1, /* ldm_regs_per_insn_subsequent. */
1788 COSTS_N_INSNS (2), /* loadf. */
1789 COSTS_N_INSNS (3), /* loadd. */
1790 COSTS_N_INSNS (1), /* load_unaligned. */
1791 COSTS_N_INSNS (2), /* store. */
1792 COSTS_N_INSNS (3), /* strd. */
1793 COSTS_N_INSNS (2), /* stm_1st. */
1794 1, /* stm_regs_per_insn_1st. */
1795 1, /* stm_regs_per_insn_subsequent. */
1796 COSTS_N_INSNS (2), /* storef. */
1797 COSTS_N_INSNS (3), /* stored. */
1798 COSTS_N_INSNS (1), /* store_unaligned. */
1799 COSTS_N_INSNS (1), /* loadv. */
1800 COSTS_N_INSNS (1) /* storev. */
1805 COSTS_N_INSNS (7), /* div. */
1806 COSTS_N_INSNS (2), /* mult. */
1807 COSTS_N_INSNS (5), /* mult_addsub. */
1808 COSTS_N_INSNS (3), /* fma. */
1809 COSTS_N_INSNS (1), /* addsub. */
1821 COSTS_N_INSNS (15), /* div. */
1822 COSTS_N_INSNS (5), /* mult. */
1823 COSTS_N_INSNS (7), /* mult_addsub. */
1824 COSTS_N_INSNS (7), /* fma. */
1825 COSTS_N_INSNS (3), /* addsub. */
1838 COSTS_N_INSNS (1), /* alu. */
1839 COSTS_N_INSNS (4), /* mult. */
1840 COSTS_N_INSNS (1), /* movi. */
1841 COSTS_N_INSNS (2), /* dup. */
1842 COSTS_N_INSNS (2) /* extract. */
1846 const struct addr_mode_cost_table generic_addr_mode_costs
=
1850 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1851 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1852 COSTS_N_INSNS (0) /* AMO_WB. */
1856 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1857 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1858 COSTS_N_INSNS (0) /* AMO_WB. */
1862 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1863 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1864 COSTS_N_INSNS (0) /* AMO_WB. */
1868 const struct tune_params arm_slowmul_tune
=
1870 &generic_extra_costs
, /* Insn extra costs. */
1871 &generic_addr_mode_costs
, /* Addressing mode costs. */
1872 NULL
, /* Sched adj cost. */
1873 arm_default_branch_cost
,
1874 &arm_default_vec_cost
,
1875 3, /* Constant limit. */
1876 5, /* Max cond insns. */
1877 8, /* Memset max inline. */
1878 1, /* Issue rate. */
1879 ARM_PREFETCH_NOT_BENEFICIAL
,
1880 tune_params::PREF_CONST_POOL_TRUE
,
1881 tune_params::PREF_LDRD_FALSE
,
1882 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1884 tune_params::DISPARAGE_FLAGS_NEITHER
,
1885 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1886 tune_params::FUSE_NOTHING
,
1887 tune_params::SCHED_AUTOPREF_OFF
1890 const struct tune_params arm_fastmul_tune
=
1892 &generic_extra_costs
, /* Insn extra costs. */
1893 &generic_addr_mode_costs
, /* Addressing mode costs. */
1894 NULL
, /* Sched adj cost. */
1895 arm_default_branch_cost
,
1896 &arm_default_vec_cost
,
1897 1, /* Constant limit. */
1898 5, /* Max cond insns. */
1899 8, /* Memset max inline. */
1900 1, /* Issue rate. */
1901 ARM_PREFETCH_NOT_BENEFICIAL
,
1902 tune_params::PREF_CONST_POOL_TRUE
,
1903 tune_params::PREF_LDRD_FALSE
,
1904 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1905 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1906 tune_params::DISPARAGE_FLAGS_NEITHER
,
1907 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1908 tune_params::FUSE_NOTHING
,
1909 tune_params::SCHED_AUTOPREF_OFF
1912 /* StrongARM has early execution of branches, so a sequence that is worth
1913 skipping is shorter. Set max_insns_skipped to a lower value. */
1915 const struct tune_params arm_strongarm_tune
=
1917 &generic_extra_costs
, /* Insn extra costs. */
1918 &generic_addr_mode_costs
, /* Addressing mode costs. */
1919 NULL
, /* Sched adj cost. */
1920 arm_default_branch_cost
,
1921 &arm_default_vec_cost
,
1922 1, /* Constant limit. */
1923 3, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 1, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL
,
1927 tune_params::PREF_CONST_POOL_TRUE
,
1928 tune_params::PREF_LDRD_FALSE
,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER
,
1932 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1933 tune_params::FUSE_NOTHING
,
1934 tune_params::SCHED_AUTOPREF_OFF
1937 const struct tune_params arm_xscale_tune
=
1939 &generic_extra_costs
, /* Insn extra costs. */
1940 &generic_addr_mode_costs
, /* Addressing mode costs. */
1941 xscale_sched_adjust_cost
,
1942 arm_default_branch_cost
,
1943 &arm_default_vec_cost
,
1944 2, /* Constant limit. */
1945 3, /* Max cond insns. */
1946 8, /* Memset max inline. */
1947 1, /* Issue rate. */
1948 ARM_PREFETCH_NOT_BENEFICIAL
,
1949 tune_params::PREF_CONST_POOL_TRUE
,
1950 tune_params::PREF_LDRD_FALSE
,
1951 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1953 tune_params::DISPARAGE_FLAGS_NEITHER
,
1954 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1955 tune_params::FUSE_NOTHING
,
1956 tune_params::SCHED_AUTOPREF_OFF
1959 const struct tune_params arm_9e_tune
=
1961 &generic_extra_costs
, /* Insn extra costs. */
1962 &generic_addr_mode_costs
, /* Addressing mode costs. */
1963 NULL
, /* Sched adj cost. */
1964 arm_default_branch_cost
,
1965 &arm_default_vec_cost
,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 1, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL
,
1971 tune_params::PREF_CONST_POOL_TRUE
,
1972 tune_params::PREF_LDRD_FALSE
,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER
,
1976 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1977 tune_params::FUSE_NOTHING
,
1978 tune_params::SCHED_AUTOPREF_OFF
1981 const struct tune_params arm_marvell_pj4_tune
=
1983 &generic_extra_costs
, /* Insn extra costs. */
1984 &generic_addr_mode_costs
, /* Addressing mode costs. */
1985 NULL
, /* Sched adj cost. */
1986 arm_default_branch_cost
,
1987 &arm_default_vec_cost
,
1988 1, /* Constant limit. */
1989 5, /* Max cond insns. */
1990 8, /* Memset max inline. */
1991 2, /* Issue rate. */
1992 ARM_PREFETCH_NOT_BENEFICIAL
,
1993 tune_params::PREF_CONST_POOL_TRUE
,
1994 tune_params::PREF_LDRD_FALSE
,
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1997 tune_params::DISPARAGE_FLAGS_NEITHER
,
1998 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1999 tune_params::FUSE_NOTHING
,
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_v6t2_tune
=
2005 &generic_extra_costs
, /* Insn extra costs. */
2006 &generic_addr_mode_costs
, /* Addressing mode costs. */
2007 NULL
, /* Sched adj cost. */
2008 arm_default_branch_cost
,
2009 &arm_default_vec_cost
,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 1, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL
,
2015 tune_params::PREF_CONST_POOL_FALSE
,
2016 tune_params::PREF_LDRD_FALSE
,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER
,
2020 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2021 tune_params::FUSE_NOTHING
,
2022 tune_params::SCHED_AUTOPREF_OFF
2026 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2027 const struct tune_params arm_cortex_tune
=
2029 &generic_extra_costs
,
2030 &generic_addr_mode_costs
, /* Addressing mode costs. */
2031 NULL
, /* Sched adj cost. */
2032 arm_default_branch_cost
,
2033 &arm_default_vec_cost
,
2034 1, /* Constant limit. */
2035 5, /* Max cond insns. */
2036 8, /* Memset max inline. */
2037 2, /* Issue rate. */
2038 ARM_PREFETCH_NOT_BENEFICIAL
,
2039 tune_params::PREF_CONST_POOL_FALSE
,
2040 tune_params::PREF_LDRD_FALSE
,
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2043 tune_params::DISPARAGE_FLAGS_NEITHER
,
2044 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2045 tune_params::FUSE_NOTHING
,
2046 tune_params::SCHED_AUTOPREF_OFF
2049 const struct tune_params arm_cortex_a8_tune
=
2051 &cortexa8_extra_costs
,
2052 &generic_addr_mode_costs
, /* Addressing mode costs. */
2053 NULL
, /* Sched adj cost. */
2054 arm_default_branch_cost
,
2055 &arm_default_vec_cost
,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 2, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL
,
2061 tune_params::PREF_CONST_POOL_FALSE
,
2062 tune_params::PREF_LDRD_FALSE
,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER
,
2066 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2067 tune_params::FUSE_NOTHING
,
2068 tune_params::SCHED_AUTOPREF_OFF
2071 const struct tune_params arm_cortex_a7_tune
=
2073 &cortexa7_extra_costs
,
2074 &generic_addr_mode_costs
, /* Addressing mode costs. */
2075 NULL
, /* Sched adj cost. */
2076 arm_default_branch_cost
,
2077 &arm_default_vec_cost
,
2078 1, /* Constant limit. */
2079 5, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL
,
2083 tune_params::PREF_CONST_POOL_FALSE
,
2084 tune_params::PREF_LDRD_FALSE
,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_NEITHER
,
2088 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2089 tune_params::FUSE_NOTHING
,
2090 tune_params::SCHED_AUTOPREF_OFF
2093 const struct tune_params arm_cortex_a15_tune
=
2095 &cortexa15_extra_costs
,
2096 &generic_addr_mode_costs
, /* Addressing mode costs. */
2097 NULL
, /* Sched adj cost. */
2098 arm_default_branch_cost
,
2099 &arm_default_vec_cost
,
2100 1, /* Constant limit. */
2101 2, /* Max cond insns. */
2102 8, /* Memset max inline. */
2103 3, /* Issue rate. */
2104 ARM_PREFETCH_NOT_BENEFICIAL
,
2105 tune_params::PREF_CONST_POOL_FALSE
,
2106 tune_params::PREF_LDRD_TRUE
,
2107 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2108 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2109 tune_params::DISPARAGE_FLAGS_ALL
,
2110 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2111 tune_params::FUSE_NOTHING
,
2112 tune_params::SCHED_AUTOPREF_FULL
2115 const struct tune_params arm_cortex_a35_tune
=
2117 &cortexa53_extra_costs
,
2118 &generic_addr_mode_costs
, /* Addressing mode costs. */
2119 NULL
, /* Sched adj cost. */
2120 arm_default_branch_cost
,
2121 &arm_default_vec_cost
,
2122 1, /* Constant limit. */
2123 5, /* Max cond insns. */
2124 8, /* Memset max inline. */
2125 1, /* Issue rate. */
2126 ARM_PREFETCH_NOT_BENEFICIAL
,
2127 tune_params::PREF_CONST_POOL_FALSE
,
2128 tune_params::PREF_LDRD_FALSE
,
2129 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2131 tune_params::DISPARAGE_FLAGS_NEITHER
,
2132 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2133 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2134 tune_params::SCHED_AUTOPREF_OFF
2137 const struct tune_params arm_cortex_a53_tune
=
2139 &cortexa53_extra_costs
,
2140 &generic_addr_mode_costs
, /* Addressing mode costs. */
2141 NULL
, /* Sched adj cost. */
2142 arm_default_branch_cost
,
2143 &arm_default_vec_cost
,
2144 1, /* Constant limit. */
2145 5, /* Max cond insns. */
2146 8, /* Memset max inline. */
2147 2, /* Issue rate. */
2148 ARM_PREFETCH_NOT_BENEFICIAL
,
2149 tune_params::PREF_CONST_POOL_FALSE
,
2150 tune_params::PREF_LDRD_FALSE
,
2151 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2152 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2153 tune_params::DISPARAGE_FLAGS_NEITHER
,
2154 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2155 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2156 tune_params::SCHED_AUTOPREF_OFF
2159 const struct tune_params arm_cortex_a57_tune
=
2161 &cortexa57_extra_costs
,
2162 &generic_addr_mode_costs
, /* addressing mode costs */
2163 NULL
, /* Sched adj cost. */
2164 arm_default_branch_cost
,
2165 &arm_default_vec_cost
,
2166 1, /* Constant limit. */
2167 2, /* Max cond insns. */
2168 8, /* Memset max inline. */
2169 3, /* Issue rate. */
2170 ARM_PREFETCH_NOT_BENEFICIAL
,
2171 tune_params::PREF_CONST_POOL_FALSE
,
2172 tune_params::PREF_LDRD_TRUE
,
2173 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2174 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2175 tune_params::DISPARAGE_FLAGS_ALL
,
2176 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2177 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2178 tune_params::SCHED_AUTOPREF_FULL
2181 const struct tune_params arm_exynosm1_tune
=
2183 &exynosm1_extra_costs
,
2184 &generic_addr_mode_costs
, /* Addressing mode costs. */
2185 NULL
, /* Sched adj cost. */
2186 arm_default_branch_cost
,
2187 &arm_default_vec_cost
,
2188 1, /* Constant limit. */
2189 2, /* Max cond insns. */
2190 8, /* Memset max inline. */
2191 3, /* Issue rate. */
2192 ARM_PREFETCH_NOT_BENEFICIAL
,
2193 tune_params::PREF_CONST_POOL_FALSE
,
2194 tune_params::PREF_LDRD_TRUE
,
2195 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2196 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2197 tune_params::DISPARAGE_FLAGS_ALL
,
2198 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2199 tune_params::FUSE_NOTHING
,
2200 tune_params::SCHED_AUTOPREF_OFF
2203 const struct tune_params arm_xgene1_tune
=
2205 &xgene1_extra_costs
,
2206 &generic_addr_mode_costs
, /* Addressing mode costs. */
2207 NULL
, /* Sched adj cost. */
2208 arm_default_branch_cost
,
2209 &arm_default_vec_cost
,
2210 1, /* Constant limit. */
2211 2, /* Max cond insns. */
2212 32, /* Memset max inline. */
2213 4, /* Issue rate. */
2214 ARM_PREFETCH_NOT_BENEFICIAL
,
2215 tune_params::PREF_CONST_POOL_FALSE
,
2216 tune_params::PREF_LDRD_TRUE
,
2217 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2218 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2219 tune_params::DISPARAGE_FLAGS_ALL
,
2220 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2221 tune_params::FUSE_NOTHING
,
2222 tune_params::SCHED_AUTOPREF_OFF
2225 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2226 less appealing. Set max_insns_skipped to a low value. */
2228 const struct tune_params arm_cortex_a5_tune
=
2230 &cortexa5_extra_costs
,
2231 &generic_addr_mode_costs
, /* Addressing mode costs. */
2232 NULL
, /* Sched adj cost. */
2233 arm_cortex_a5_branch_cost
,
2234 &arm_default_vec_cost
,
2235 1, /* Constant limit. */
2236 1, /* Max cond insns. */
2237 8, /* Memset max inline. */
2238 2, /* Issue rate. */
2239 ARM_PREFETCH_NOT_BENEFICIAL
,
2240 tune_params::PREF_CONST_POOL_FALSE
,
2241 tune_params::PREF_LDRD_FALSE
,
2242 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2243 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2244 tune_params::DISPARAGE_FLAGS_NEITHER
,
2245 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2246 tune_params::FUSE_NOTHING
,
2247 tune_params::SCHED_AUTOPREF_OFF
2250 const struct tune_params arm_cortex_a9_tune
=
2252 &cortexa9_extra_costs
,
2253 &generic_addr_mode_costs
, /* Addressing mode costs. */
2254 cortex_a9_sched_adjust_cost
,
2255 arm_default_branch_cost
,
2256 &arm_default_vec_cost
,
2257 1, /* Constant limit. */
2258 5, /* Max cond insns. */
2259 8, /* Memset max inline. */
2260 2, /* Issue rate. */
2261 ARM_PREFETCH_BENEFICIAL(4,32,32),
2262 tune_params::PREF_CONST_POOL_FALSE
,
2263 tune_params::PREF_LDRD_FALSE
,
2264 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2265 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2266 tune_params::DISPARAGE_FLAGS_NEITHER
,
2267 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2268 tune_params::FUSE_NOTHING
,
2269 tune_params::SCHED_AUTOPREF_OFF
2272 const struct tune_params arm_cortex_a12_tune
=
2274 &cortexa12_extra_costs
,
2275 &generic_addr_mode_costs
, /* Addressing mode costs. */
2276 NULL
, /* Sched adj cost. */
2277 arm_default_branch_cost
,
2278 &arm_default_vec_cost
, /* Vectorizer costs. */
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 2, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL
,
2284 tune_params::PREF_CONST_POOL_FALSE
,
2285 tune_params::PREF_LDRD_TRUE
,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_ALL
,
2289 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2290 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2291 tune_params::SCHED_AUTOPREF_OFF
2294 const struct tune_params arm_cortex_a73_tune
=
2296 &cortexa57_extra_costs
,
2297 &generic_addr_mode_costs
, /* Addressing mode costs. */
2298 NULL
, /* Sched adj cost. */
2299 arm_default_branch_cost
,
2300 &arm_default_vec_cost
, /* Vectorizer costs. */
2301 1, /* Constant limit. */
2302 2, /* Max cond insns. */
2303 8, /* Memset max inline. */
2304 2, /* Issue rate. */
2305 ARM_PREFETCH_NOT_BENEFICIAL
,
2306 tune_params::PREF_CONST_POOL_FALSE
,
2307 tune_params::PREF_LDRD_TRUE
,
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2309 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2310 tune_params::DISPARAGE_FLAGS_ALL
,
2311 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2312 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2313 tune_params::SCHED_AUTOPREF_FULL
2316 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2317 cycle to execute each. An LDR from the constant pool also takes two cycles
2318 to execute, but mildly increases pipelining opportunity (consecutive
2319 loads/stores can be pipelined together, saving one cycle), and may also
2320 improve icache utilisation. Hence we prefer the constant pool for such
2323 const struct tune_params arm_v7m_tune
=
2326 &generic_addr_mode_costs
, /* Addressing mode costs. */
2327 NULL
, /* Sched adj cost. */
2328 arm_cortex_m_branch_cost
,
2329 &arm_default_vec_cost
,
2330 1, /* Constant limit. */
2331 2, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL
,
2335 tune_params::PREF_CONST_POOL_TRUE
,
2336 tune_params::PREF_LDRD_FALSE
,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER
,
2340 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2341 tune_params::FUSE_NOTHING
,
2342 tune_params::SCHED_AUTOPREF_OFF
2345 /* Cortex-M7 tuning. */
2347 const struct tune_params arm_cortex_m7_tune
=
2350 &generic_addr_mode_costs
, /* Addressing mode costs. */
2351 NULL
, /* Sched adj cost. */
2352 arm_cortex_m7_branch_cost
,
2353 &arm_default_vec_cost
,
2354 0, /* Constant limit. */
2355 1, /* Max cond insns. */
2356 8, /* Memset max inline. */
2357 2, /* Issue rate. */
2358 ARM_PREFETCH_NOT_BENEFICIAL
,
2359 tune_params::PREF_CONST_POOL_TRUE
,
2360 tune_params::PREF_LDRD_FALSE
,
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2362 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2363 tune_params::DISPARAGE_FLAGS_NEITHER
,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2365 tune_params::FUSE_NOTHING
,
2366 tune_params::SCHED_AUTOPREF_OFF
2369 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2370 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2372 const struct tune_params arm_v6m_tune
=
2374 &generic_extra_costs
, /* Insn extra costs. */
2375 &generic_addr_mode_costs
, /* Addressing mode costs. */
2376 NULL
, /* Sched adj cost. */
2377 arm_default_branch_cost
,
2378 &arm_default_vec_cost
, /* Vectorizer costs. */
2379 1, /* Constant limit. */
2380 5, /* Max cond insns. */
2381 8, /* Memset max inline. */
2382 1, /* Issue rate. */
2383 ARM_PREFETCH_NOT_BENEFICIAL
,
2384 tune_params::PREF_CONST_POOL_FALSE
,
2385 tune_params::PREF_LDRD_FALSE
,
2386 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2387 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2388 tune_params::DISPARAGE_FLAGS_NEITHER
,
2389 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2390 tune_params::FUSE_NOTHING
,
2391 tune_params::SCHED_AUTOPREF_OFF
2394 const struct tune_params arm_fa726te_tune
=
2396 &generic_extra_costs
, /* Insn extra costs. */
2397 &generic_addr_mode_costs
, /* Addressing mode costs. */
2398 fa726te_sched_adjust_cost
,
2399 arm_default_branch_cost
,
2400 &arm_default_vec_cost
,
2401 1, /* Constant limit. */
2402 5, /* Max cond insns. */
2403 8, /* Memset max inline. */
2404 2, /* Issue rate. */
2405 ARM_PREFETCH_NOT_BENEFICIAL
,
2406 tune_params::PREF_CONST_POOL_TRUE
,
2407 tune_params::PREF_LDRD_FALSE
,
2408 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2409 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2410 tune_params::DISPARAGE_FLAGS_NEITHER
,
2411 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2412 tune_params::FUSE_NOTHING
,
2413 tune_params::SCHED_AUTOPREF_OFF
2416 /* Auto-generated CPU, FPU and architecture tables. */
2417 #include "arm-cpu-data.h"
2419 /* The name of the preprocessor macro to define for this architecture. PROFILE
2420 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2421 is thus chosen to be big enough to hold the longest architecture name. */
2423 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2425 /* Supported TLS relocations. */
2436 TLS_DESCSEQ
/* GNU scheme */
2439 /* The maximum number of insns to be used when loading a constant. */
2441 arm_constant_limit (bool size_p
)
2443 return size_p
? 1 : current_tune
->constant_limit
;
2446 /* Emit an insn that's a simple single-set. Both the operands must be known
2448 inline static rtx_insn
*
2449 emit_set_insn (rtx x
, rtx y
)
2451 return emit_insn (gen_rtx_SET (x
, y
));
2454 /* Return the number of bits set in VALUE. */
2456 bit_count (unsigned long value
)
2458 unsigned long count
= 0;
2463 value
&= value
- 1; /* Clear the least-significant set bit. */
2469 /* Return the number of bits set in BMAP. */
2471 bitmap_popcount (const sbitmap bmap
)
2473 unsigned int count
= 0;
2475 sbitmap_iterator sbi
;
2477 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2486 } arm_fixed_mode_set
;
2488 /* A small helper for setting fixed-point library libfuncs. */
2491 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2492 const char *funcname
, const char *modename
,
2497 if (num_suffix
== 0)
2498 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2500 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2502 set_optab_libfunc (optable
, mode
, buffer
);
2506 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2507 machine_mode from
, const char *funcname
,
2508 const char *toname
, const char *fromname
)
2511 const char *maybe_suffix_2
= "";
2513 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2514 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2515 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2516 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2517 maybe_suffix_2
= "2";
2519 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2522 set_conv_libfunc (optable
, to
, from
, buffer
);
2525 static GTY(()) rtx speculation_barrier_libfunc
;
2527 /* Record that we have no arithmetic or comparison libfuncs for
2528 machine mode MODE. */
2531 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode
)
2534 set_optab_libfunc (add_optab
, mode
, NULL
);
2535 set_optab_libfunc (sdiv_optab
, mode
, NULL
);
2536 set_optab_libfunc (smul_optab
, mode
, NULL
);
2537 set_optab_libfunc (neg_optab
, mode
, NULL
);
2538 set_optab_libfunc (sub_optab
, mode
, NULL
);
2541 set_optab_libfunc (eq_optab
, mode
, NULL
);
2542 set_optab_libfunc (ne_optab
, mode
, NULL
);
2543 set_optab_libfunc (lt_optab
, mode
, NULL
);
2544 set_optab_libfunc (le_optab
, mode
, NULL
);
2545 set_optab_libfunc (ge_optab
, mode
, NULL
);
2546 set_optab_libfunc (gt_optab
, mode
, NULL
);
2547 set_optab_libfunc (unord_optab
, mode
, NULL
);
2550 /* Set up library functions unique to ARM. */
2552 arm_init_libfuncs (void)
2554 machine_mode mode_iter
;
2556 /* For Linux, we have access to kernel support for atomic operations. */
2557 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2558 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2560 /* There are no special library functions unless we are using the
2565 /* The functions below are described in Section 4 of the "Run-Time
2566 ABI for the ARM architecture", Version 1.0. */
2568 /* Double-precision floating-point arithmetic. Table 2. */
2569 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2570 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2571 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2572 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2573 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2575 /* Double-precision comparisons. Table 3. */
2576 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2577 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2578 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2579 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2580 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2581 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2582 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2584 /* Single-precision floating-point arithmetic. Table 4. */
2585 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2586 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2587 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2588 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2589 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2591 /* Single-precision comparisons. Table 5. */
2592 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2593 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2594 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2595 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2596 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2597 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2598 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2600 /* Floating-point to integer conversions. Table 6. */
2601 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2602 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2603 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2604 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2605 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2606 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2607 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2608 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2610 /* Conversions between floating types. Table 7. */
2611 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2612 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2614 /* Integer to floating-point conversions. Table 8. */
2615 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2616 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2617 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2618 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2619 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2620 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2621 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2622 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2624 /* Long long. Table 9. */
2625 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2626 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2627 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2628 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2629 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2630 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2631 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2632 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2634 /* Integer (32/32->32) division. \S 4.3.1. */
2635 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2636 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2638 /* The divmod functions are designed so that they can be used for
2639 plain division, even though they return both the quotient and the
2640 remainder. The quotient is returned in the usual location (i.e.,
2641 r0 for SImode, {r0, r1} for DImode), just as would be expected
2642 for an ordinary division routine. Because the AAPCS calling
2643 conventions specify that all of { r0, r1, r2, r3 } are
2644 callee-saved registers, there is no need to tell the compiler
2645 explicitly that those registers are clobbered by these
2647 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2648 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2650 /* For SImode division the ABI provides div-without-mod routines,
2651 which are faster. */
2652 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2653 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2655 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2656 divmod libcalls instead. */
2657 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2658 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2659 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2660 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2662 /* Half-precision float operations. The compiler handles all operations
2663 with NULL libfuncs by converting the SFmode. */
2664 switch (arm_fp16_format
)
2666 case ARM_FP16_FORMAT_IEEE
:
2667 case ARM_FP16_FORMAT_ALTERNATIVE
:
2670 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2671 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2673 : "__gnu_f2h_alternative"));
2674 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2675 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2677 : "__gnu_h2f_alternative"));
2679 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2680 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2682 : "__gnu_d2h_alternative"));
2684 arm_block_arith_comp_libfuncs_for_mode (HFmode
);
2691 /* For all possible libcalls in BFmode, record NULL. */
2692 FOR_EACH_MODE_IN_CLASS (mode_iter
, MODE_FLOAT
)
2694 set_conv_libfunc (trunc_optab
, BFmode
, mode_iter
, NULL
);
2695 set_conv_libfunc (trunc_optab
, mode_iter
, BFmode
, NULL
);
2696 set_conv_libfunc (sext_optab
, mode_iter
, BFmode
, NULL
);
2697 set_conv_libfunc (sext_optab
, BFmode
, mode_iter
, NULL
);
2699 arm_block_arith_comp_libfuncs_for_mode (BFmode
);
2701 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2703 const arm_fixed_mode_set fixed_arith_modes
[] =
2706 { E_UQQmode
, "uqq" },
2708 { E_UHQmode
, "uhq" },
2710 { E_USQmode
, "usq" },
2712 { E_UDQmode
, "udq" },
2714 { E_UTQmode
, "utq" },
2716 { E_UHAmode
, "uha" },
2718 { E_USAmode
, "usa" },
2720 { E_UDAmode
, "uda" },
2722 { E_UTAmode
, "uta" }
2724 const arm_fixed_mode_set fixed_conv_modes
[] =
2727 { E_UQQmode
, "uqq" },
2729 { E_UHQmode
, "uhq" },
2731 { E_USQmode
, "usq" },
2733 { E_UDQmode
, "udq" },
2735 { E_UTQmode
, "utq" },
2737 { E_UHAmode
, "uha" },
2739 { E_USAmode
, "usa" },
2741 { E_UDAmode
, "uda" },
2743 { E_UTAmode
, "uta" },
2754 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2756 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2757 "add", fixed_arith_modes
[i
].name
, 3);
2758 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2759 "ssadd", fixed_arith_modes
[i
].name
, 3);
2760 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2761 "usadd", fixed_arith_modes
[i
].name
, 3);
2762 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2763 "sub", fixed_arith_modes
[i
].name
, 3);
2764 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2765 "sssub", fixed_arith_modes
[i
].name
, 3);
2766 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2767 "ussub", fixed_arith_modes
[i
].name
, 3);
2768 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2769 "mul", fixed_arith_modes
[i
].name
, 3);
2770 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2771 "ssmul", fixed_arith_modes
[i
].name
, 3);
2772 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2773 "usmul", fixed_arith_modes
[i
].name
, 3);
2774 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2775 "div", fixed_arith_modes
[i
].name
, 3);
2776 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2777 "udiv", fixed_arith_modes
[i
].name
, 3);
2778 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2779 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2780 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2781 "usdiv", fixed_arith_modes
[i
].name
, 3);
2782 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2783 "neg", fixed_arith_modes
[i
].name
, 2);
2784 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2785 "ssneg", fixed_arith_modes
[i
].name
, 2);
2786 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2787 "usneg", fixed_arith_modes
[i
].name
, 2);
2788 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2789 "ashl", fixed_arith_modes
[i
].name
, 3);
2790 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2791 "ashr", fixed_arith_modes
[i
].name
, 3);
2792 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2793 "lshr", fixed_arith_modes
[i
].name
, 3);
2794 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2795 "ssashl", fixed_arith_modes
[i
].name
, 3);
2796 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2797 "usashl", fixed_arith_modes
[i
].name
, 3);
2798 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2799 "cmp", fixed_arith_modes
[i
].name
, 2);
2802 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2803 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2806 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2807 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2810 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2811 fixed_conv_modes
[j
].mode
, "fract",
2812 fixed_conv_modes
[i
].name
,
2813 fixed_conv_modes
[j
].name
);
2814 arm_set_fixed_conv_libfunc (satfract_optab
,
2815 fixed_conv_modes
[i
].mode
,
2816 fixed_conv_modes
[j
].mode
, "satfract",
2817 fixed_conv_modes
[i
].name
,
2818 fixed_conv_modes
[j
].name
);
2819 arm_set_fixed_conv_libfunc (fractuns_optab
,
2820 fixed_conv_modes
[i
].mode
,
2821 fixed_conv_modes
[j
].mode
, "fractuns",
2822 fixed_conv_modes
[i
].name
,
2823 fixed_conv_modes
[j
].name
);
2824 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2825 fixed_conv_modes
[i
].mode
,
2826 fixed_conv_modes
[j
].mode
, "satfractuns",
2827 fixed_conv_modes
[i
].name
,
2828 fixed_conv_modes
[j
].name
);
2832 if (TARGET_AAPCS_BASED
)
2833 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2835 speculation_barrier_libfunc
= init_one_libfunc ("__speculation_barrier");
2838 /* On AAPCS systems, this is the "struct __va_list". */
2839 static GTY(()) tree va_list_type
;
2841 /* Return the type to use as __builtin_va_list. */
2843 arm_build_builtin_va_list (void)
2848 if (!TARGET_AAPCS_BASED
)
2849 return std_build_builtin_va_list ();
2851 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2859 The C Library ABI further reinforces this definition in \S
2862 We must follow this definition exactly. The structure tag
2863 name is visible in C++ mangled names, and thus forms a part
2864 of the ABI. The field name may be used by people who
2865 #include <stdarg.h>. */
2866 /* Create the type. */
2867 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2868 /* Give it the required name. */
2869 va_list_name
= build_decl (BUILTINS_LOCATION
,
2871 get_identifier ("__va_list"),
2873 DECL_ARTIFICIAL (va_list_name
) = 1;
2874 TYPE_NAME (va_list_type
) = va_list_name
;
2875 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2876 /* Create the __ap field. */
2877 ap_field
= build_decl (BUILTINS_LOCATION
,
2879 get_identifier ("__ap"),
2881 DECL_ARTIFICIAL (ap_field
) = 1;
2882 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2883 TYPE_FIELDS (va_list_type
) = ap_field
;
2884 /* Compute its layout. */
2885 layout_type (va_list_type
);
2887 return va_list_type
;
2890 /* Return an expression of type "void *" pointing to the next
2891 available argument in a variable-argument list. VALIST is the
2892 user-level va_list object, of type __builtin_va_list. */
2894 arm_extract_valist_ptr (tree valist
)
2896 if (TREE_TYPE (valist
) == error_mark_node
)
2897 return error_mark_node
;
2899 /* On an AAPCS target, the pointer is stored within "struct
2901 if (TARGET_AAPCS_BASED
)
2903 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2904 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2905 valist
, ap_field
, NULL_TREE
);
2911 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2913 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2915 valist
= arm_extract_valist_ptr (valist
);
2916 std_expand_builtin_va_start (valist
, nextarg
);
2919 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2921 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2924 valist
= arm_extract_valist_ptr (valist
);
2925 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2928 /* Check any incompatible options that the user has specified. */
2930 arm_option_check_internal (struct gcc_options
*opts
)
2932 int flags
= opts
->x_target_flags
;
2934 /* iWMMXt and NEON are incompatible. */
2936 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2937 error ("iWMMXt and NEON are incompatible");
2939 /* Make sure that the processor choice does not conflict with any of the
2940 other command line choices. */
2941 if (TARGET_ARM_P (flags
)
2942 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2943 error ("target CPU does not support ARM mode");
2945 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2946 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2947 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2949 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2950 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2952 /* If this target is normally configured to use APCS frames, warn if they
2953 are turned off and debugging is turned on. */
2954 if (TARGET_ARM_P (flags
)
2955 && write_symbols
!= NO_DEBUG
2956 && !TARGET_APCS_FRAME
2957 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2958 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2961 /* iWMMXt unsupported under Thumb mode. */
2962 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2963 error ("iWMMXt unsupported under Thumb mode");
2965 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2966 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2968 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2970 error ("RTP PIC is incompatible with Thumb");
2974 if (target_pure_code
|| target_slow_flash_data
)
2976 const char *flag
= (target_pure_code
? "-mpure-code" :
2977 "-mslow-flash-data");
2978 bool common_unsupported_modes
= arm_arch_notm
|| flag_pic
|| TARGET_NEON
;
2980 /* We only support -mslow-flash-data on M-profile targets with
2982 if (target_slow_flash_data
&& (!TARGET_HAVE_MOVT
|| common_unsupported_modes
))
2983 error ("%s only supports non-pic code on M-profile targets with the "
2984 "MOVT instruction", flag
);
2986 /* We only support -mpure-code on M-profile targets. */
2987 if (target_pure_code
&& common_unsupported_modes
)
2988 error ("%s only supports non-pic code on M-profile targets", flag
);
2990 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2991 -mword-relocations forbids relocation of MOVT/MOVW. */
2992 if (target_word_relocations
)
2993 error ("%s incompatible with %<-mword-relocations%>", flag
);
2997 /* Recompute the global settings depending on target attribute options. */
3000 arm_option_params_internal (void)
3002 /* If we are not using the default (ARM mode) section anchor offset
3003 ranges, then set the correct ranges now. */
3006 /* Thumb-1 LDR instructions cannot have negative offsets.
3007 Permissible positive offset ranges are 5-bit (for byte loads),
3008 6-bit (for halfword loads), or 7-bit (for word loads).
3009 Empirical results suggest a 7-bit anchor range gives the best
3010 overall code size. */
3011 targetm
.min_anchor_offset
= 0;
3012 targetm
.max_anchor_offset
= 127;
3014 else if (TARGET_THUMB2
)
3016 /* The minimum is set such that the total size of the block
3017 for a particular anchor is 248 + 1 + 4095 bytes, which is
3018 divisible by eight, ensuring natural spacing of anchors. */
3019 targetm
.min_anchor_offset
= -248;
3020 targetm
.max_anchor_offset
= 4095;
3024 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
3025 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
3028 /* Increase the number of conditional instructions with -Os. */
3029 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
3031 /* For THUMB2, we limit the conditional sequence to one IT block. */
3033 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
3036 targetm
.md_asm_adjust
= thumb1_md_asm_adjust
;
3038 targetm
.md_asm_adjust
= arm_md_asm_adjust
;
3041 /* True if -mflip-thumb should next add an attribute for the default
3042 mode, false if it should next add an attribute for the opposite mode. */
3043 static GTY(()) bool thumb_flipper
;
3045 /* Options after initial target override. */
3046 static GTY(()) tree init_optimize
;
3049 arm_override_options_after_change_1 (struct gcc_options
*opts
,
3050 struct gcc_options
*opts_set
)
3052 /* -falign-functions without argument: supply one. */
3053 if (opts
->x_flag_align_functions
&& !opts_set
->x_str_align_functions
)
3054 opts
->x_str_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
3055 && opts
->x_optimize_size
? "2" : "4";
3058 /* Implement targetm.override_options_after_change. */
3061 arm_override_options_after_change (void)
3063 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
3066 /* Implement TARGET_OPTION_RESTORE. */
3068 arm_option_restore (struct gcc_options */
* opts */
,
3069 struct gcc_options */
* opts_set */
,
3070 struct cl_target_option
*ptr
)
3072 arm_configure_build_target (&arm_active_target
, ptr
, false);
3073 arm_option_reconfigure_globals ();
3076 /* Reset options between modes that the user has specified. */
3078 arm_option_override_internal (struct gcc_options
*opts
,
3079 struct gcc_options
*opts_set
)
3081 arm_override_options_after_change_1 (opts
, opts_set
);
3083 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3085 /* The default is to enable interworking, so this warning message would
3086 be confusing to users who have just compiled with
3087 eg, -march=armv4. */
3088 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3089 opts
->x_target_flags
&= ~MASK_INTERWORK
;
3092 if (TARGET_THUMB_P (opts
->x_target_flags
)
3093 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3095 warning (0, "target CPU does not support THUMB instructions");
3096 opts
->x_target_flags
&= ~MASK_THUMB
;
3099 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
3101 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3102 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
3105 /* Callee super interworking implies thumb interworking. Adding
3106 this to the flags here simplifies the logic elsewhere. */
3107 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
3108 opts
->x_target_flags
|= MASK_INTERWORK
;
3110 /* need to remember initial values so combinaisons of options like
3111 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3112 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
3114 if (! opts_set
->x_arm_restrict_it
)
3115 opts
->x_arm_restrict_it
= arm_arch8
;
3117 /* ARM execution state and M profile don't have [restrict] IT. */
3118 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
3119 opts
->x_arm_restrict_it
= 0;
3121 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3122 if (!opts_set
->x_arm_restrict_it
3123 && (opts_set
->x_arm_cpu_string
|| opts_set
->x_arm_tune_string
))
3124 opts
->x_arm_restrict_it
= 0;
3126 /* Enable -munaligned-access by default for
3127 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3128 i.e. Thumb2 and ARM state only.
3129 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3130 - ARMv8 architecture-base processors.
3132 Disable -munaligned-access by default for
3133 - all pre-ARMv6 architecture-based processors
3134 - ARMv6-M architecture-based processors
3135 - ARMv8-M Baseline processors. */
3137 if (! opts_set
->x_unaligned_access
)
3139 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3140 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3142 else if (opts
->x_unaligned_access
== 1
3143 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3145 warning (0, "target CPU does not support unaligned accesses");
3146 opts
->x_unaligned_access
= 0;
3149 /* Don't warn since it's on by default in -O2. */
3150 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3151 opts
->x_flag_schedule_insns
= 0;
3153 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3155 /* Disable shrink-wrap when optimizing function for size, since it tends to
3156 generate additional returns. */
3157 if (optimize_function_for_size_p (cfun
)
3158 && TARGET_THUMB2_P (opts
->x_target_flags
))
3159 opts
->x_flag_shrink_wrap
= false;
3161 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3163 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3164 - epilogue_insns - does not accurately model the corresponding insns
3165 emitted in the asm file. In particular, see the comment in thumb_exit
3166 'Find out how many of the (return) argument registers we can corrupt'.
3167 As a consequence, the epilogue may clobber registers without fipa-ra
3168 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3169 TODO: Accurately model clobbers for epilogue_insns and reenable
3171 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3172 opts
->x_flag_ipa_ra
= 0;
3174 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3176 /* Thumb2 inline assembly code should always use unified syntax.
3177 This will apply to ARM and Thumb1 eventually. */
3178 if (TARGET_THUMB2_P (opts
->x_target_flags
))
3179 opts
->x_inline_asm_unified
= true;
3181 if (arm_stack_protector_guard
== SSP_GLOBAL
3182 && opts
->x_arm_stack_protector_guard_offset_str
)
3184 error ("incompatible options %<-mstack-protector-guard=global%> and "
3185 "%<-mstack-protector-guard-offset=%s%>",
3186 arm_stack_protector_guard_offset_str
);
3189 if (opts
->x_arm_stack_protector_guard_offset_str
)
3192 const char *str
= arm_stack_protector_guard_offset_str
;
3194 long offs
= strtol (arm_stack_protector_guard_offset_str
, &end
, 0);
3195 if (!*str
|| *end
|| errno
)
3196 error ("%qs is not a valid offset in %qs", str
,
3197 "-mstack-protector-guard-offset=");
3198 arm_stack_protector_guard_offset
= offs
;
3201 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3202 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3206 static sbitmap isa_all_fpubits_internal
;
3207 static sbitmap isa_all_fpbits
;
3208 static sbitmap isa_quirkbits
;
3210 /* Configure a build target TARGET from the user-specified options OPTS and
3211 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3212 architecture have been specified, but the two are not identical. */
3214 arm_configure_build_target (struct arm_build_target
*target
,
3215 struct cl_target_option
*opts
,
3216 bool warn_compatible
)
3218 const cpu_option
*arm_selected_tune
= NULL
;
3219 const arch_option
*arm_selected_arch
= NULL
;
3220 const cpu_option
*arm_selected_cpu
= NULL
;
3221 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3222 const char *tune_opts
= NULL
;
3223 const char *arch_opts
= NULL
;
3224 const char *cpu_opts
= NULL
;
3226 bitmap_clear (target
->isa
);
3227 target
->core_name
= NULL
;
3228 target
->arch_name
= NULL
;
3230 if (opts
->x_arm_arch_string
)
3232 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3234 opts
->x_arm_arch_string
);
3235 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3238 if (opts
->x_arm_cpu_string
)
3240 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3241 opts
->x_arm_cpu_string
);
3242 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3243 arm_selected_tune
= arm_selected_cpu
;
3244 /* If taking the tuning from -mcpu, we don't need to rescan the
3245 options for tuning. */
3248 if (opts
->x_arm_tune_string
)
3250 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3251 opts
->x_arm_tune_string
);
3252 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3255 if (arm_selected_arch
)
3257 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3258 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3261 if (arm_selected_cpu
)
3263 auto_sbitmap
cpu_isa (isa_num_bits
);
3264 auto_sbitmap
isa_delta (isa_num_bits
);
3266 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3267 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3269 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3270 /* Ignore any bits that are quirk bits. */
3271 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3272 /* If the user (or the default configuration) has specified a
3273 specific FPU, then ignore any bits that depend on the FPU
3274 configuration. Do similarly if using the soft-float
3276 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
3277 || arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3278 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpbits
);
3280 if (!bitmap_empty_p (isa_delta
))
3282 if (warn_compatible
)
3283 warning (0, "switch %<-mcpu=%s%> conflicts "
3284 "with switch %<-march=%s%>",
3285 opts
->x_arm_cpu_string
,
3286 opts
->x_arm_arch_string
);
3288 /* -march wins for code generation.
3289 -mcpu wins for default tuning. */
3290 if (!arm_selected_tune
)
3291 arm_selected_tune
= arm_selected_cpu
;
3293 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3294 target
->arch_name
= arm_selected_arch
->common
.name
;
3298 /* Architecture and CPU are essentially the same.
3299 Prefer the CPU setting. */
3300 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3301 target
->core_name
= arm_selected_cpu
->common
.name
;
3302 /* Copy the CPU's capabilities, so that we inherit the
3303 appropriate extensions and quirks. */
3304 bitmap_copy (target
->isa
, cpu_isa
);
3309 /* Pick a CPU based on the architecture. */
3310 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3311 target
->arch_name
= arm_selected_arch
->common
.name
;
3312 /* Note: target->core_name is left unset in this path. */
3315 else if (arm_selected_cpu
)
3317 target
->core_name
= arm_selected_cpu
->common
.name
;
3318 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3319 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3321 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3323 /* If the user did not specify a processor or architecture, choose
3327 const cpu_option
*sel
;
3328 auto_sbitmap
sought_isa (isa_num_bits
);
3329 bitmap_clear (sought_isa
);
3330 auto_sbitmap
default_isa (isa_num_bits
);
3332 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3333 TARGET_CPU_DEFAULT
);
3334 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3335 gcc_assert (arm_selected_cpu
->common
.name
);
3337 /* RWE: All of the selection logic below (to the end of this
3338 'if' clause) looks somewhat suspect. It appears to be mostly
3339 there to support forcing thumb support when the default CPU
3340 does not have thumb (somewhat dubious in terms of what the
3341 user might be expecting). I think it should be removed once
3342 support for the pre-thumb era cores is removed. */
3343 sel
= arm_selected_cpu
;
3344 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3345 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3348 /* Now check to see if the user has specified any command line
3349 switches that require certain abilities from the cpu. */
3351 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3352 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3354 /* If there are such requirements and the default CPU does not
3355 satisfy them, we need to run over the complete list of
3356 cores looking for one that is satisfactory. */
3357 if (!bitmap_empty_p (sought_isa
)
3358 && !bitmap_subset_p (sought_isa
, default_isa
))
3360 auto_sbitmap
candidate_isa (isa_num_bits
);
3361 /* We're only interested in a CPU with at least the
3362 capabilities of the default CPU and the required
3363 additional features. */
3364 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3366 /* Try to locate a CPU type that supports all of the abilities
3367 of the default CPU, plus the extra abilities requested by
3369 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3371 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3372 /* An exact match? */
3373 if (bitmap_equal_p (default_isa
, candidate_isa
))
3377 if (sel
->common
.name
== NULL
)
3379 unsigned current_bit_count
= isa_num_bits
;
3380 const cpu_option
*best_fit
= NULL
;
3382 /* Ideally we would like to issue an error message here
3383 saying that it was not possible to find a CPU compatible
3384 with the default CPU, but which also supports the command
3385 line options specified by the programmer, and so they
3386 ought to use the -mcpu=<name> command line option to
3387 override the default CPU type.
3389 If we cannot find a CPU that has exactly the
3390 characteristics of the default CPU and the given
3391 command line options we scan the array again looking
3392 for a best match. The best match must have at least
3393 the capabilities of the perfect match. */
3394 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3396 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3398 if (bitmap_subset_p (default_isa
, candidate_isa
))
3402 bitmap_and_compl (candidate_isa
, candidate_isa
,
3404 count
= bitmap_popcount (candidate_isa
);
3406 if (count
< current_bit_count
)
3409 current_bit_count
= count
;
3413 gcc_assert (best_fit
);
3417 arm_selected_cpu
= sel
;
3420 /* Now we know the CPU, we can finally initialize the target
3422 target
->core_name
= arm_selected_cpu
->common
.name
;
3423 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3424 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3426 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3429 gcc_assert (arm_selected_cpu
);
3430 gcc_assert (arm_selected_arch
);
3432 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3434 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3435 auto_sbitmap
fpu_bits (isa_num_bits
);
3437 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3438 /* This should clear out ALL bits relating to the FPU/simd
3439 extensions, to avoid potentially invalid combinations later on
3440 that we can't match. At present we only clear out those bits
3441 that can be set by -mfpu. This should be fixed in GCC-12. */
3442 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits_internal
);
3443 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3446 /* If we have the soft-float ABI, clear any feature bits relating to use of
3447 floating-point operations. They'll just confuse things later on. */
3448 if (arm_float_abi
== ARM_FLOAT_ABI_SOFT
)
3449 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpbits
);
3451 /* There may be implied bits which we still need to enable. These are
3452 non-named features which are needed to complete other sets of features,
3453 but cannot be enabled from arm-cpus.in due to being shared between
3454 multiple fgroups. Each entry in all_implied_fbits is of the form
3455 ante -> cons, meaning that if the feature "ante" is enabled, we should
3456 implicitly enable "cons". */
3457 const struct fbit_implication
*impl
= all_implied_fbits
;
3460 if (bitmap_bit_p (target
->isa
, impl
->ante
))
3461 bitmap_set_bit (target
->isa
, impl
->cons
);
3465 if (!arm_selected_tune
)
3466 arm_selected_tune
= arm_selected_cpu
;
3467 else /* Validate the features passed to -mtune. */
3468 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3470 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3472 /* Finish initializing the target structure. */
3473 if (!target
->arch_name
)
3474 target
->arch_name
= arm_selected_arch
->common
.name
;
3475 target
->arch_pp_name
= arm_selected_arch
->arch
;
3476 target
->base_arch
= arm_selected_arch
->base_arch
;
3477 target
->profile
= arm_selected_arch
->profile
;
3479 target
->tune_flags
= tune_data
->tune_flags
;
3480 target
->tune
= tune_data
->tune
;
3481 target
->tune_core
= tune_data
->scheduler
;
3484 /* Fix up any incompatible options that the user has specified. */
3486 arm_option_override (void)
3488 static const enum isa_feature fpu_bitlist_internal
[]
3489 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3490 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3491 static const enum isa_feature fp_bitlist
[]
3492 = { ISA_ALL_FP
, isa_bit_mve_float
, isa_nobit
};
3493 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3494 cl_target_option opts
;
3496 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3497 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3499 isa_all_fpubits_internal
= sbitmap_alloc (isa_num_bits
);
3500 isa_all_fpbits
= sbitmap_alloc (isa_num_bits
);
3501 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
3502 arm_initialize_isa (isa_all_fpbits
, fp_bitlist
);
3504 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3506 if (!OPTION_SET_P (arm_fpu_index
))
3511 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3514 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3517 cl_target_option_save (&opts
, &global_options
, &global_options_set
);
3518 arm_configure_build_target (&arm_active_target
, &opts
, true);
3520 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3521 SUBTARGET_OVERRIDE_OPTIONS
;
3524 /* Initialize boolean versions of the architectural flags, for use
3525 in the arm.md file and for enabling feature flags. */
3526 arm_option_reconfigure_globals ();
3528 arm_tune
= arm_active_target
.tune_core
;
3529 tune_flags
= arm_active_target
.tune_flags
;
3530 current_tune
= arm_active_target
.tune
;
3532 /* TBD: Dwarf info for apcs frame is not handled yet. */
3533 if (TARGET_APCS_FRAME
)
3534 flag_shrink_wrap
= false;
3536 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3538 warning (0, "%<-mapcs-stack-check%> incompatible with "
3539 "%<-mno-apcs-frame%>");
3540 target_flags
|= MASK_APCS_FRAME
;
3543 if (TARGET_POKE_FUNCTION_NAME
)
3544 target_flags
|= MASK_APCS_FRAME
;
3546 if (TARGET_APCS_REENT
&& flag_pic
)
3547 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3549 if (TARGET_APCS_REENT
)
3550 warning (0, "APCS reentrant code not supported. Ignored");
3552 /* Set up some tuning parameters. */
3553 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3554 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3555 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3556 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3557 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3558 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3560 /* For arm2/3 there is no need to do any scheduling if we are doing
3561 software floating-point. */
3562 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3563 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3565 /* Override the default structure alignment for AAPCS ABI. */
3566 if (!OPTION_SET_P (arm_structure_size_boundary
))
3568 if (TARGET_AAPCS_BASED
)
3569 arm_structure_size_boundary
= 8;
3573 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3575 if (arm_structure_size_boundary
!= 8
3576 && arm_structure_size_boundary
!= 32
3577 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3579 if (ARM_DOUBLEWORD_ALIGN
)
3581 "structure size boundary can only be set to 8, 32 or 64");
3583 warning (0, "structure size boundary can only be set to 8 or 32");
3584 arm_structure_size_boundary
3585 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3589 if (TARGET_VXWORKS_RTP
)
3591 if (!OPTION_SET_P (arm_pic_data_is_text_relative
))
3592 arm_pic_data_is_text_relative
= 0;
3595 && !arm_pic_data_is_text_relative
3596 && !(OPTION_SET_P (target_flags
) & MASK_SINGLE_PIC_BASE
))
3597 /* When text & data segments don't have a fixed displacement, the
3598 intended use is with a single, read only, pic base register.
3599 Unless the user explicitly requested not to do that, set
3601 target_flags
|= MASK_SINGLE_PIC_BASE
;
3603 /* If stack checking is disabled, we can use r10 as the PIC register,
3604 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3605 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3607 if (TARGET_VXWORKS_RTP
)
3608 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3609 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3612 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3613 arm_pic_register
= 9;
3615 /* If in FDPIC mode then force arm_pic_register to be r9. */
3618 arm_pic_register
= FDPIC_REGNUM
;
3620 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3623 if (arm_pic_register_string
!= NULL
)
3625 int pic_register
= decode_reg_name (arm_pic_register_string
);
3628 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3630 /* Prevent the user from choosing an obviously stupid PIC register. */
3631 else if (pic_register
< 0 || call_used_or_fixed_reg_p (pic_register
)
3632 || pic_register
== HARD_FRAME_POINTER_REGNUM
3633 || pic_register
== STACK_POINTER_REGNUM
3634 || pic_register
>= PC_REGNUM
3635 || (TARGET_VXWORKS_RTP
3636 && (unsigned int) pic_register
!= arm_pic_register
))
3637 error ("unable to use %qs for PIC register", arm_pic_register_string
);
3639 arm_pic_register
= pic_register
;
3643 target_word_relocations
= 1;
3645 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3646 if (fix_cm3_ldrd
== 2)
3648 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3654 /* Enable fix_vlldm by default if required. */
3657 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_vlldm
))
3663 /* Enable fix_aes by default if required. */
3664 if (fix_aes_erratum_1742098
== 2)
3666 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_aes_1742098
))
3667 fix_aes_erratum_1742098
= 1;
3669 fix_aes_erratum_1742098
= 0;
3672 /* Hot/Cold partitioning is not currently supported, since we can't
3673 handle literal pool placement in that case. */
3674 if (flag_reorder_blocks_and_partition
)
3676 inform (input_location
,
3677 "%<-freorder-blocks-and-partition%> not supported "
3678 "on this architecture");
3679 flag_reorder_blocks_and_partition
= 0;
3680 flag_reorder_blocks
= 1;
3684 /* Hoisting PIC address calculations more aggressively provides a small,
3685 but measurable, size reduction for PIC code. Therefore, we decrease
3686 the bar for unrestricted expression hoisting to the cost of PIC address
3687 calculation, which is 2 instructions. */
3688 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3689 param_gcse_unrestricted_cost
, 2);
3691 /* ARM EABI defaults to strict volatile bitfields. */
3692 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3693 && abi_version_at_least(2))
3694 flag_strict_volatile_bitfields
= 1;
3696 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3697 have deemed it beneficial (signified by setting
3698 prefetch.num_slots to 1 or more). */
3699 if (flag_prefetch_loop_arrays
< 0
3702 && current_tune
->prefetch
.num_slots
> 0)
3703 flag_prefetch_loop_arrays
= 1;
3705 /* Set up parameters to be used in prefetching algorithm. Do not
3706 override the defaults unless we are tuning for a core we have
3707 researched values for. */
3708 if (current_tune
->prefetch
.num_slots
> 0)
3709 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3710 param_simultaneous_prefetches
,
3711 current_tune
->prefetch
.num_slots
);
3712 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3713 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3714 param_l1_cache_line_size
,
3715 current_tune
->prefetch
.l1_cache_line_size
);
3716 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3718 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3719 param_destruct_interfere_size
,
3720 current_tune
->prefetch
.l1_cache_line_size
);
3721 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3722 param_construct_interfere_size
,
3723 current_tune
->prefetch
.l1_cache_line_size
);
3727 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3728 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3730 /* More recent Cortex chips have a 64-byte cache line, but are marked
3731 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3732 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3733 param_destruct_interfere_size
, 64);
3734 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3735 param_construct_interfere_size
, 64);
3738 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3739 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3740 param_l1_cache_size
,
3741 current_tune
->prefetch
.l1_cache_size
);
3743 /* Look through ready list and all of queue for instructions
3744 relevant for L2 auto-prefetcher. */
3745 int sched_autopref_queue_depth
;
3747 switch (current_tune
->sched_autopref
)
3749 case tune_params::SCHED_AUTOPREF_OFF
:
3750 sched_autopref_queue_depth
= -1;
3753 case tune_params::SCHED_AUTOPREF_RANK
:
3754 sched_autopref_queue_depth
= 0;
3757 case tune_params::SCHED_AUTOPREF_FULL
:
3758 sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3765 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
3766 param_sched_autopref_queue_depth
,
3767 sched_autopref_queue_depth
);
3769 /* Currently, for slow flash data, we just disable literal pools. We also
3770 disable it for pure-code. */
3771 if (target_slow_flash_data
|| target_pure_code
)
3772 arm_disable_literal_pool
= true;
3774 /* Disable scheduling fusion by default if it's not armv7 processor
3775 or doesn't prefer ldrd/strd. */
3776 if (flag_schedule_fusion
== 2
3777 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3778 flag_schedule_fusion
= 0;
3780 /* Need to remember initial options before they are overriden. */
3781 init_optimize
= build_optimization_node (&global_options
,
3782 &global_options_set
);
3784 arm_options_perform_arch_sanity_checks ();
3785 arm_option_override_internal (&global_options
, &global_options_set
);
3786 arm_option_check_internal (&global_options
);
3787 arm_option_params_internal ();
3789 /* Create the default target_options structure. */
3790 target_option_default_node
= target_option_current_node
3791 = build_target_option_node (&global_options
, &global_options_set
);
3793 /* Register global variables with the garbage collector. */
3794 arm_add_gc_roots ();
3796 /* Init initial mode for testing. */
3797 thumb_flipper
= TARGET_THUMB
;
3801 /* Reconfigure global status flags from the active_target.isa. */
3803 arm_option_reconfigure_globals (void)
3805 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3806 arm_base_arch
= arm_active_target
.base_arch
;
3808 /* Initialize boolean versions of the architectural flags, for use
3809 in the arm.md file. */
3810 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3811 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3812 arm_arch5t
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5t
);
3813 arm_arch5te
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5te
);
3814 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3815 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3816 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3817 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3818 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3819 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3820 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3821 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3822 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3823 arm_arch8_3
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_3
);
3824 arm_arch8_4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_4
);
3825 arm_arch8_1m_main
= bitmap_bit_p (arm_active_target
.isa
,
3826 isa_bit_armv8_1m_main
);
3827 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3828 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3829 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3830 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3831 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3832 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3833 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3834 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3835 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3836 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3837 arm_arch_i8mm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_i8mm
);
3838 arm_arch_bf16
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_bf16
);
3840 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3843 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3844 error ("selected fp16 options are incompatible");
3845 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3849 arm_arch_cde_coproc
= 0;
3850 int cde_bits
[] = {isa_bit_cdecp0
, isa_bit_cdecp1
, isa_bit_cdecp2
,
3851 isa_bit_cdecp3
, isa_bit_cdecp4
, isa_bit_cdecp5
,
3852 isa_bit_cdecp6
, isa_bit_cdecp7
};
3853 for (int i
= 0, e
= ARRAY_SIZE (cde_bits
); i
< e
; i
++)
3855 int cde_bit
= bitmap_bit_p (arm_active_target
.isa
, cde_bits
[i
]);
3858 arm_arch_cde
|= cde_bit
;
3859 arm_arch_cde_coproc
|= arm_arch_cde_coproc_bits
[i
];
3863 /* And finally, set up some quirks. */
3864 arm_arch_no_volatile_ce
3865 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3866 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3867 isa_bit_quirk_armv6kz
);
3869 /* Use the cp15 method if it is available. */
3870 if (target_thread_pointer
== TP_AUTO
)
3872 if (arm_arch6k
&& !TARGET_THUMB1
)
3873 target_thread_pointer
= TP_CP15
;
3875 target_thread_pointer
= TP_SOFT
;
3878 if (!TARGET_HARD_TP
&& arm_stack_protector_guard
== SSP_TLSREG
)
3879 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3882 /* Perform some validation between the desired architecture and the rest of the
3885 arm_options_perform_arch_sanity_checks (void)
3887 /* V5T code we generate is completely interworking capable, so we turn off
3888 TARGET_INTERWORK here to avoid many tests later on. */
3890 /* XXX However, we must pass the right pre-processor defines to CPP
3891 or GLD can get confused. This is a hack. */
3892 if (TARGET_INTERWORK
)
3893 arm_cpp_interwork
= 1;
3896 target_flags
&= ~MASK_INTERWORK
;
3898 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3899 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3901 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3902 error ("iwmmxt abi requires an iwmmxt capable cpu");
3904 /* BPABI targets use linker tricks to allow interworking on cores
3905 without thumb support. */
3906 if (TARGET_INTERWORK
3908 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3910 warning (0, "target CPU does not support interworking" );
3911 target_flags
&= ~MASK_INTERWORK
;
3914 /* If soft-float is specified then don't use FPU. */
3915 if (TARGET_SOFT_FLOAT
)
3916 arm_fpu_attr
= FPU_NONE
;
3918 arm_fpu_attr
= FPU_VFP
;
3920 if (TARGET_AAPCS_BASED
)
3922 if (TARGET_CALLER_INTERWORKING
)
3923 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3925 if (TARGET_CALLEE_INTERWORKING
)
3926 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3929 /* __fp16 support currently assumes the core has ldrh. */
3930 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3931 sorry ("%<__fp16%> and no ldrh");
3933 if (use_cmse
&& !arm_arch_cmse
)
3934 error ("target CPU does not support ARMv8-M Security Extensions");
3936 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3937 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3938 if (use_cmse
&& TARGET_HARD_FLOAT
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3939 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3942 if (TARGET_AAPCS_BASED
)
3944 if (arm_abi
== ARM_ABI_IWMMXT
)
3945 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3946 else if (TARGET_HARD_FLOAT_ABI
)
3948 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3949 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
)
3950 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_mve
))
3951 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3954 arm_pcs_default
= ARM_PCS_AAPCS
;
3958 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3959 sorry ("%<-mfloat-abi=hard%> and VFP");
3961 if (arm_abi
== ARM_ABI_APCS
)
3962 arm_pcs_default
= ARM_PCS_APCS
;
3964 arm_pcs_default
= ARM_PCS_ATPCS
;
3968 /* Test whether a local function descriptor is canonical, i.e.,
3969 whether we can use GOTOFFFUNCDESC to compute the address of the
3972 arm_fdpic_local_funcdesc_p (rtx fnx
)
3975 enum symbol_visibility vis
;
3981 if (! SYMBOL_REF_LOCAL_P (fnx
))
3984 fn
= SYMBOL_REF_DECL (fnx
);
3989 vis
= DECL_VISIBILITY (fn
);
3991 if (vis
== VISIBILITY_PROTECTED
)
3992 /* Private function descriptors for protected functions are not
3993 canonical. Temporarily change the visibility to global so that
3994 we can ensure uniqueness of funcdesc pointers. */
3995 DECL_VISIBILITY (fn
) = VISIBILITY_DEFAULT
;
3997 ret
= default_binds_local_p_1 (fn
, flag_pic
);
3999 DECL_VISIBILITY (fn
) = vis
;
4005 arm_add_gc_roots (void)
4007 gcc_obstack_init(&minipool_obstack
);
4008 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
4011 /* A table of known ARM exception types.
4012 For use with the interrupt function attribute. */
4016 const char *const arg
;
4017 const unsigned long return_value
;
4021 static const isr_attribute_arg isr_attribute_args
[] =
4023 { "IRQ", ARM_FT_ISR
},
4024 { "irq", ARM_FT_ISR
},
4025 { "FIQ", ARM_FT_FIQ
},
4026 { "fiq", ARM_FT_FIQ
},
4027 { "ABORT", ARM_FT_ISR
},
4028 { "abort", ARM_FT_ISR
},
4029 { "UNDEF", ARM_FT_EXCEPTION
},
4030 { "undef", ARM_FT_EXCEPTION
},
4031 { "SWI", ARM_FT_EXCEPTION
},
4032 { "swi", ARM_FT_EXCEPTION
},
4033 { NULL
, ARM_FT_NORMAL
}
4036 /* Returns the (interrupt) function type of the current
4037 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4039 static unsigned long
4040 arm_isr_value (tree argument
)
4042 const isr_attribute_arg
* ptr
;
4046 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
4048 /* No argument - default to IRQ. */
4049 if (argument
== NULL_TREE
)
4052 /* Get the value of the argument. */
4053 if (TREE_VALUE (argument
) == NULL_TREE
4054 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
4055 return ARM_FT_UNKNOWN
;
4057 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
4059 /* Check it against the list of known arguments. */
4060 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
4061 if (streq (arg
, ptr
->arg
))
4062 return ptr
->return_value
;
4064 /* An unrecognized interrupt type. */
4065 return ARM_FT_UNKNOWN
;
4068 /* Computes the type of the current function. */
4070 static unsigned long
4071 arm_compute_func_type (void)
4073 unsigned long type
= ARM_FT_UNKNOWN
;
4077 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
4079 /* Decide if the current function is volatile. Such functions
4080 never return, and many memory cycles can be saved by not storing
4081 register values that will never be needed again. This optimization
4082 was added to speed up context switching in a kernel application. */
4084 && (TREE_NOTHROW (current_function_decl
)
4085 || !(flag_unwind_tables
4087 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
4088 && TREE_THIS_VOLATILE (current_function_decl
))
4089 type
|= ARM_FT_VOLATILE
;
4091 if (cfun
->static_chain_decl
!= NULL
)
4092 type
|= ARM_FT_NESTED
;
4094 attr
= DECL_ATTRIBUTES (current_function_decl
);
4096 a
= lookup_attribute ("naked", attr
);
4098 type
|= ARM_FT_NAKED
;
4100 a
= lookup_attribute ("isr", attr
);
4102 a
= lookup_attribute ("interrupt", attr
);
4105 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
4107 type
|= arm_isr_value (TREE_VALUE (a
));
4109 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
4110 type
|= ARM_FT_CMSE_ENTRY
;
4115 /* Returns the type of the current function. */
4118 arm_current_func_type (void)
4120 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
4121 cfun
->machine
->func_type
= arm_compute_func_type ();
4123 return cfun
->machine
->func_type
;
4127 arm_allocate_stack_slots_for_args (void)
4129 /* Naked functions should not allocate stack slots for arguments. */
4130 return !IS_NAKED (arm_current_func_type ());
4134 arm_warn_func_return (tree decl
)
4136 /* Naked functions are implemented entirely in assembly, including the
4137 return sequence, so suppress warnings about this. */
4138 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
4142 /* Output assembler code for a block containing the constant parts
4143 of a trampoline, leaving space for the variable parts.
4145 On the ARM, (if r8 is the static chain regnum, and remembering that
4146 referencing pc adds an offset of 8) the trampoline looks like:
4149 .word static chain value
4150 .word function's address
4151 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4153 In FDPIC mode, the trampoline looks like:
4154 .word trampoline address
4155 .word trampoline GOT address
4156 ldr r12, [pc, #8] ; #4 for Arm mode
4157 ldr r9, [pc, #8] ; #4 for Arm mode
4158 ldr pc, [pc, #8] ; #4 for Arm mode
4159 .word static chain value
4161 .word function's address
4165 arm_asm_trampoline_template (FILE *f
)
4167 fprintf (f
, "\t.syntax unified\n");
4171 /* The first two words are a function descriptor pointing to the
4172 trampoline code just below. */
4174 fprintf (f
, "\t.arm\n");
4175 else if (TARGET_THUMB2
)
4176 fprintf (f
, "\t.thumb\n");
4178 /* Only ARM and Thumb-2 are supported. */
4181 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4182 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4183 /* Trampoline code which sets the static chain register but also
4184 PIC register before jumping into real code. */
4185 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4186 STATIC_CHAIN_REGNUM
, PC_REGNUM
,
4187 TARGET_THUMB2
? 8 : 4);
4188 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4189 PIC_OFFSET_TABLE_REGNUM
, PC_REGNUM
,
4190 TARGET_THUMB2
? 8 : 4);
4191 asm_fprintf (f
, "\tldr\t%r, [%r, #%d]\n",
4192 PC_REGNUM
, PC_REGNUM
,
4193 TARGET_THUMB2
? 8 : 4);
4194 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4196 else if (TARGET_ARM
)
4198 fprintf (f
, "\t.arm\n");
4199 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4200 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
4202 else if (TARGET_THUMB2
)
4204 fprintf (f
, "\t.thumb\n");
4205 /* The Thumb-2 trampoline is similar to the arm implementation.
4206 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4207 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
4208 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
4209 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
4213 ASM_OUTPUT_ALIGN (f
, 2);
4214 fprintf (f
, "\t.code\t16\n");
4215 fprintf (f
, ".Ltrampoline_start:\n");
4216 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
4217 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4218 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
4219 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
4220 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
4221 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
4223 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4224 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4227 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4230 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4232 rtx fnaddr
, mem
, a_tramp
;
4234 emit_block_move (m_tramp
, assemble_trampoline_template (),
4235 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
4239 rtx funcdesc
= XEXP (DECL_RTL (fndecl
), 0);
4240 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
4241 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
4242 /* The function start address is at offset 8, but in Thumb mode
4243 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4245 rtx trampoline_code_start
4246 = plus_constant (Pmode
, XEXP (m_tramp
, 0), TARGET_THUMB2
? 9 : 8);
4248 /* Write initial funcdesc which points to the trampoline. */
4249 mem
= adjust_address (m_tramp
, SImode
, 0);
4250 emit_move_insn (mem
, trampoline_code_start
);
4251 mem
= adjust_address (m_tramp
, SImode
, 4);
4252 emit_move_insn (mem
, gen_rtx_REG (Pmode
, PIC_OFFSET_TABLE_REGNUM
));
4253 /* Setup static chain. */
4254 mem
= adjust_address (m_tramp
, SImode
, 20);
4255 emit_move_insn (mem
, chain_value
);
4256 /* GOT + real function entry point. */
4257 mem
= adjust_address (m_tramp
, SImode
, 24);
4258 emit_move_insn (mem
, gotaddr
);
4259 mem
= adjust_address (m_tramp
, SImode
, 28);
4260 emit_move_insn (mem
, fnaddr
);
4264 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
4265 emit_move_insn (mem
, chain_value
);
4267 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
4268 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4269 emit_move_insn (mem
, fnaddr
);
4272 a_tramp
= XEXP (m_tramp
, 0);
4273 maybe_emit_call_builtin___clear_cache (a_tramp
,
4274 plus_constant (ptr_mode
,
4279 /* Thumb trampolines should be entered in thumb mode, so set
4280 the bottom bit of the address. */
4283 arm_trampoline_adjust_address (rtx addr
)
4285 /* For FDPIC don't fix trampoline address since it's a function
4286 descriptor and not a function address. */
4287 if (TARGET_THUMB
&& !TARGET_FDPIC
)
4288 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
4289 NULL
, 0, OPTAB_LIB_WIDEN
);
4293 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4294 includes call-clobbered registers too. If this is a leaf function
4295 we can just examine the registers used by the RTL, but otherwise we
4296 have to assume that whatever function is called might clobber
4297 anything, and so we have to save all the call-clobbered registers
4299 static inline bool reg_needs_saving_p (unsigned reg
)
4301 unsigned long func_type
= arm_current_func_type ();
4303 if (IS_INTERRUPT (func_type
))
4304 if (df_regs_ever_live_p (reg
)
4305 /* Save call-clobbered core registers. */
4306 || (! crtl
->is_leaf
&& call_used_or_fixed_reg_p (reg
) && reg
< FIRST_VFP_REGNUM
))
4311 if (!df_regs_ever_live_p (reg
)
4312 || call_used_or_fixed_reg_p (reg
))
4318 /* Return 1 if it is possible to return using a single instruction.
4319 If SIBLING is non-null, this is a test for a return before a sibling
4320 call. SIBLING is the call insn, so we can examine its register usage. */
4323 use_return_insn (int iscond
, rtx sibling
)
4326 unsigned int func_type
;
4327 unsigned long saved_int_regs
;
4328 unsigned HOST_WIDE_INT stack_adjust
;
4329 arm_stack_offsets
*offsets
;
4331 /* Never use a return instruction before reload has run. */
4332 if (!reload_completed
)
4335 func_type
= arm_current_func_type ();
4337 /* Naked, volatile and stack alignment functions need special
4339 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
4342 /* So do interrupt functions that use the frame pointer and Thumb
4343 interrupt functions. */
4344 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
4347 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
4348 && !optimize_function_for_size_p (cfun
))
4351 offsets
= arm_get_frame_offsets ();
4352 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
4354 /* As do variadic functions. */
4355 if (crtl
->args
.pretend_args_size
4356 || cfun
->machine
->uses_anonymous_args
4357 /* Or if the function calls __builtin_eh_return () */
4358 || crtl
->calls_eh_return
4359 /* Or if the function calls alloca */
4360 || cfun
->calls_alloca
4361 /* Or if there is a stack adjustment. However, if the stack pointer
4362 is saved on the stack, we can use a pre-incrementing stack load. */
4363 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
4364 && stack_adjust
== 4))
4365 /* Or if the static chain register was saved above the frame, under the
4366 assumption that the stack pointer isn't saved on the stack. */
4367 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
4368 && arm_compute_static_chain_stack_bytes() != 0))
4371 saved_int_regs
= offsets
->saved_regs_mask
;
4373 /* Unfortunately, the insn
4375 ldmib sp, {..., sp, ...}
4377 triggers a bug on most SA-110 based devices, such that the stack
4378 pointer won't be correctly restored if the instruction takes a
4379 page fault. We work around this problem by popping r3 along with
4380 the other registers, since that is never slower than executing
4381 another instruction.
4383 We test for !arm_arch5t here, because code for any architecture
4384 less than this could potentially be run on one of the buggy
4386 if (stack_adjust
== 4 && !arm_arch5t
&& TARGET_ARM
)
4388 /* Validate that r3 is a call-clobbered register (always true in
4389 the default abi) ... */
4390 if (!call_used_or_fixed_reg_p (3))
4393 /* ... that it isn't being used for a return value ... */
4394 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4397 /* ... or for a tail-call argument ... */
4400 gcc_assert (CALL_P (sibling
));
4402 if (find_regno_fusage (sibling
, USE
, 3))
4406 /* ... and that there are no call-saved registers in r0-r2
4407 (always true in the default ABI). */
4408 if (saved_int_regs
& 0x7)
4412 /* Can't be done if interworking with Thumb, and any registers have been
4414 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4417 /* On StrongARM, conditional returns are expensive if they aren't
4418 taken and multiple registers have been stacked. */
4419 if (iscond
&& arm_tune_strongarm
)
4421 /* Conditional return when just the LR is stored is a simple
4422 conditional-load instruction, that's not expensive. */
4423 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4427 && arm_pic_register
!= INVALID_REGNUM
4428 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4432 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4433 several instructions if anything needs to be popped. Armv8.1-M Mainline
4434 also needs several instructions to save and restore FP context. */
4435 if (IS_CMSE_ENTRY (func_type
) && (saved_int_regs
|| TARGET_HAVE_FPCXT_CMSE
))
4438 /* If there are saved registers but the LR isn't saved, then we need
4439 two instructions for the return. */
4440 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4443 /* Can't be done if any of the VFP regs are pushed,
4444 since this also requires an insn. */
4445 if (TARGET_VFP_BASE
)
4446 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4447 if (reg_needs_saving_p (regno
))
4450 if (TARGET_REALLY_IWMMXT
)
4451 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4452 if (reg_needs_saving_p (regno
))
4458 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4459 shrink-wrapping if possible. This is the case if we need to emit a
4460 prologue, which we can test by looking at the offsets. */
4462 use_simple_return_p (void)
4464 arm_stack_offsets
*offsets
;
4466 /* Note this function can be called before or after reload. */
4467 if (!reload_completed
)
4468 arm_compute_frame_layout ();
4470 offsets
= arm_get_frame_offsets ();
4471 return offsets
->outgoing_args
!= 0;
4474 /* Return TRUE if int I is a valid immediate ARM constant. */
4477 const_ok_for_arm (HOST_WIDE_INT i
)
4481 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4482 be all zero, or all one. */
4483 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4484 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4485 != ((~(unsigned HOST_WIDE_INT
) 0)
4486 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4489 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4491 /* Fast return for 0 and small values. We must do this for zero, since
4492 the code below can't handle that one case. */
4493 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4496 /* Get the number of trailing zeros. */
4497 lowbit
= ffs((int) i
) - 1;
4499 /* Only even shifts are allowed in ARM mode so round down to the
4500 nearest even number. */
4504 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4509 /* Allow rotated constants in ARM mode. */
4511 && ((i
& ~0xc000003f) == 0
4512 || (i
& ~0xf000000f) == 0
4513 || (i
& ~0xfc000003) == 0))
4516 else if (TARGET_THUMB2
)
4520 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4523 if (i
== v
|| i
== (v
| (v
<< 8)))
4526 /* Allow repeated pattern 0xXY00XY00. */
4532 else if (TARGET_HAVE_MOVT
)
4534 /* Thumb-1 Targets with MOVT. */
4544 /* Return true if I is a valid constant for the operation CODE. */
4546 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4548 if (const_ok_for_arm (i
))
4554 /* See if we can use movw. */
4555 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4558 /* Otherwise, try mvn. */
4559 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4562 /* See if we can use addw or subw. */
4564 && ((i
& 0xfffff000) == 0
4565 || ((-i
) & 0xfffff000) == 0))
4586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4588 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4605 /* Return true if I is a valid di mode constant for the operation CODE. */
4607 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4609 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4610 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4611 rtx hi
= GEN_INT (hi_val
);
4612 rtx lo
= GEN_INT (lo_val
);
4622 return const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF
4623 || const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF;
4625 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4632 /* Emit a sequence of insns to handle a large constant.
4633 CODE is the code of the operation required, it can be any of SET, PLUS,
4634 IOR, AND, XOR, MINUS;
4635 MODE is the mode in which the operation is being performed;
4636 VAL is the integer to operate on;
4637 SOURCE is the other operand (a register, or a null-pointer for SET);
4638 SUBTARGETS means it is safe to create scratch registers if that will
4639 either produce a simpler sequence, or we will want to cse the values.
4640 Return value is the number of insns emitted. */
4642 /* ??? Tweak this for thumb2. */
4644 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4645 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4649 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4650 cond
= COND_EXEC_TEST (PATTERN (insn
));
4654 if (subtargets
|| code
== SET
4655 || (REG_P (target
) && REG_P (source
)
4656 && REGNO (target
) != REGNO (source
)))
4658 /* After arm_reorg has been called, we can't fix up expensive
4659 constants by pushing them into memory so we must synthesize
4660 them in-line, regardless of the cost. This is only likely to
4661 be more costly on chips that have load delay slots and we are
4662 compiling without running the scheduler (so no splitting
4663 occurred before the final instruction emission).
4665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4667 if (!cfun
->machine
->after_arm_reorg
4669 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4671 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4676 /* Currently SET is the only monadic value for CODE, all
4677 the rest are diadic. */
4678 if (TARGET_USE_MOVT
)
4679 arm_emit_movpair (target
, GEN_INT (val
));
4681 emit_set_insn (target
, GEN_INT (val
));
4687 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4689 if (TARGET_USE_MOVT
)
4690 arm_emit_movpair (temp
, GEN_INT (val
));
4692 emit_set_insn (temp
, GEN_INT (val
));
4694 /* For MINUS, the value is subtracted from, since we never
4695 have subtraction of a constant. */
4697 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4699 emit_set_insn (target
,
4700 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4706 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4711 ARM/THUMB2 immediates, and add up to VAL.
4712 Thr function return value gives the number of insns required. */
4714 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4715 struct four_ints
*return_sequence
)
4717 int best_consecutive_zeros
= 0;
4721 struct four_ints tmp_sequence
;
4723 /* If we aren't targeting ARM, the best place to start is always at
4724 the bottom, otherwise look more closely. */
4727 for (i
= 0; i
< 32; i
+= 2)
4729 int consecutive_zeros
= 0;
4731 if (!(val
& (3 << i
)))
4733 while ((i
< 32) && !(val
& (3 << i
)))
4735 consecutive_zeros
+= 2;
4738 if (consecutive_zeros
> best_consecutive_zeros
)
4740 best_consecutive_zeros
= consecutive_zeros
;
4741 best_start
= i
- consecutive_zeros
;
4748 /* So long as it won't require any more insns to do so, it's
4749 desirable to emit a small constant (in bits 0...9) in the last
4750 insn. This way there is more chance that it can be combined with
4751 a later addressing insn to form a pre-indexed load or store
4752 operation. Consider:
4754 *((volatile int *)0xe0000100) = 1;
4755 *((volatile int *)0xe0000110) = 2;
4757 We want this to wind up as:
4761 str rB, [rA, #0x100]
4763 str rB, [rA, #0x110]
4765 rather than having to synthesize both large constants from scratch.
4767 Therefore, we calculate how many insns would be required to emit
4768 the constant starting from `best_start', and also starting from
4769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4770 yield a shorter sequence, we may as well use zero. */
4771 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4773 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4775 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4776 if (insns2
<= insns1
)
4778 *return_sequence
= tmp_sequence
;
4786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4788 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4789 struct four_ints
*return_sequence
, int i
)
4791 int remainder
= val
& 0xffffffff;
4794 /* Try and find a way of doing the job in either two or three
4797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4798 location. We start at position I. This may be the MSB, or
4799 optimial_immediate_sequence may have positioned it at the largest block
4800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4801 wrapping around to the top of the word when we drop off the bottom.
4802 In the worst case this code should produce no more than four insns.
4804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4805 constants, shifted to any arbitrary location. We should always start
4810 unsigned int b1
, b2
, b3
, b4
;
4811 unsigned HOST_WIDE_INT result
;
4814 gcc_assert (insns
< 4);
4819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4820 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4823 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4824 /* We can use addw/subw for the last 12 bits. */
4828 /* Use an 8-bit shifted/rotated immediate. */
4832 result
= remainder
& ((0x0ff << end
)
4833 | ((i
< end
) ? (0xff >> (32 - end
))
4840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4841 arbitrary shifts. */
4842 i
-= TARGET_ARM
? 2 : 1;
4846 /* Next, see if we can do a better job with a thumb2 replicated
4849 We do it this way around to catch the cases like 0x01F001E0 where
4850 two 8-bit immediates would work, but a replicated constant would
4853 TODO: 16-bit constants that don't clear all the bits, but still win.
4854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4857 b1
= (remainder
& 0xff000000) >> 24;
4858 b2
= (remainder
& 0x00ff0000) >> 16;
4859 b3
= (remainder
& 0x0000ff00) >> 8;
4860 b4
= remainder
& 0xff;
4864 /* The 8-bit immediate already found clears b1 (and maybe b2),
4865 but must leave b3 and b4 alone. */
4867 /* First try to find a 32-bit replicated constant that clears
4868 almost everything. We can assume that we can't do it in one,
4869 or else we wouldn't be here. */
4870 unsigned int tmp
= b1
& b2
& b3
& b4
;
4871 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4873 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4874 + (tmp
== b3
) + (tmp
== b4
);
4876 && (matching_bytes
>= 3
4877 || (matching_bytes
== 2
4878 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4880 /* At least 3 of the bytes match, and the fourth has at
4881 least as many bits set, or two of the bytes match
4882 and it will only require one more insn to finish. */
4890 /* Second, try to find a 16-bit replicated constant that can
4891 leave three of the bytes clear. If b2 or b4 is already
4892 zero, then we can. If the 8-bit from above would not
4893 clear b2 anyway, then we still win. */
4894 else if (b1
== b3
&& (!b2
|| !b4
4895 || (remainder
& 0x00ff0000 & ~result
)))
4897 result
= remainder
& 0xff00ff00;
4903 /* The 8-bit immediate already found clears b2 (and maybe b3)
4904 and we don't get here unless b1 is alredy clear, but it will
4905 leave b4 unchanged. */
4907 /* If we can clear b2 and b4 at once, then we win, since the
4908 8-bits couldn't possibly reach that far. */
4911 result
= remainder
& 0x00ff00ff;
4917 return_sequence
->i
[insns
++] = result
;
4918 remainder
&= ~result
;
4920 if (code
== SET
|| code
== MINUS
)
4928 /* Emit an instruction with the indicated PATTERN. If COND is
4929 non-NULL, conditionalize the execution of the instruction on COND
4933 emit_constant_insn (rtx cond
, rtx pattern
)
4936 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4937 emit_insn (pattern
);
4940 /* As above, but extra parameter GENERATE which, if clear, suppresses
4944 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4945 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4946 int subtargets
, int generate
)
4950 int final_invert
= 0;
4952 int set_sign_bit_copies
= 0;
4953 int clear_sign_bit_copies
= 0;
4954 int clear_zero_bit_copies
= 0;
4955 int set_zero_bit_copies
= 0;
4956 int insns
= 0, neg_insns
, inv_insns
;
4957 unsigned HOST_WIDE_INT temp1
, temp2
;
4958 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4959 struct four_ints
*immediates
;
4960 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4962 /* Find out which operations are safe for a given CODE. Also do a quick
4963 check for degenerate cases; these can occur when DImode operations
4976 if (remainder
== 0xffffffff)
4979 emit_constant_insn (cond
,
4980 gen_rtx_SET (target
,
4981 GEN_INT (ARM_SIGN_EXTEND (val
))));
4987 if (reload_completed
&& rtx_equal_p (target
, source
))
4991 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5000 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
5003 if (remainder
== 0xffffffff)
5005 if (reload_completed
&& rtx_equal_p (target
, source
))
5008 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5017 if (reload_completed
&& rtx_equal_p (target
, source
))
5020 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
5024 if (remainder
== 0xffffffff)
5027 emit_constant_insn (cond
,
5028 gen_rtx_SET (target
,
5029 gen_rtx_NOT (mode
, source
)));
5036 /* We treat MINUS as (val - source), since (source - val) is always
5037 passed as (source + (-val)). */
5041 emit_constant_insn (cond
,
5042 gen_rtx_SET (target
,
5043 gen_rtx_NEG (mode
, source
)));
5046 if (const_ok_for_arm (val
))
5049 emit_constant_insn (cond
,
5050 gen_rtx_SET (target
,
5051 gen_rtx_MINUS (mode
, GEN_INT (val
),
5062 /* If we can do it in one insn get out quickly. */
5063 if (const_ok_for_op (val
, code
))
5066 emit_constant_insn (cond
,
5067 gen_rtx_SET (target
,
5069 ? gen_rtx_fmt_ee (code
, mode
, source
,
5075 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5077 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
5078 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
5082 if (mode
== SImode
&& i
== 16)
5083 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5085 emit_constant_insn (cond
,
5086 gen_zero_extendhisi2
5087 (target
, gen_lowpart (HImode
, source
)));
5089 /* Extz only supports SImode, but we can coerce the operands
5091 emit_constant_insn (cond
,
5092 gen_extzv_t2 (gen_lowpart (SImode
, target
),
5093 gen_lowpart (SImode
, source
),
5094 GEN_INT (i
), const0_rtx
));
5100 /* Calculate a few attributes that may be useful for specific
5102 /* Count number of leading zeros. */
5103 for (i
= 31; i
>= 0; i
--)
5105 if ((remainder
& (1 << i
)) == 0)
5106 clear_sign_bit_copies
++;
5111 /* Count number of leading 1's. */
5112 for (i
= 31; i
>= 0; i
--)
5114 if ((remainder
& (1 << i
)) != 0)
5115 set_sign_bit_copies
++;
5120 /* Count number of trailing zero's. */
5121 for (i
= 0; i
<= 31; i
++)
5123 if ((remainder
& (1 << i
)) == 0)
5124 clear_zero_bit_copies
++;
5129 /* Count number of trailing 1's. */
5130 for (i
= 0; i
<= 31; i
++)
5132 if ((remainder
& (1 << i
)) != 0)
5133 set_zero_bit_copies
++;
5141 /* See if we can do this by sign_extending a constant that is known
5142 to be negative. This is a good, way of doing it, since the shift
5143 may well merge into a subsequent insn. */
5144 if (set_sign_bit_copies
> 1)
5146 if (const_ok_for_arm
5147 (temp1
= ARM_SIGN_EXTEND (remainder
5148 << (set_sign_bit_copies
- 1))))
5152 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5153 emit_constant_insn (cond
,
5154 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5155 emit_constant_insn (cond
,
5156 gen_ashrsi3 (target
, new_src
,
5157 GEN_INT (set_sign_bit_copies
- 1)));
5161 /* For an inverted constant, we will need to set the low bits,
5162 these will be shifted out of harm's way. */
5163 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
5164 if (const_ok_for_arm (~temp1
))
5168 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5169 emit_constant_insn (cond
,
5170 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5171 emit_constant_insn (cond
,
5172 gen_ashrsi3 (target
, new_src
,
5173 GEN_INT (set_sign_bit_copies
- 1)));
5179 /* See if we can calculate the value as the difference between two
5180 valid immediates. */
5181 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
5183 int topshift
= clear_sign_bit_copies
& ~1;
5185 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
5186 & (0xff000000 >> topshift
));
5188 /* If temp1 is zero, then that means the 9 most significant
5189 bits of remainder were 1 and we've caused it to overflow.
5190 When topshift is 0 we don't need to do anything since we
5191 can borrow from 'bit 32'. */
5192 if (temp1
== 0 && topshift
!= 0)
5193 temp1
= 0x80000000 >> (topshift
- 1);
5195 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
5197 if (const_ok_for_arm (temp2
))
5201 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5202 emit_constant_insn (cond
,
5203 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
5204 emit_constant_insn (cond
,
5205 gen_addsi3 (target
, new_src
,
5213 /* See if we can generate this by setting the bottom (or the top)
5214 16 bits, and then shifting these into the other half of the
5215 word. We only look for the simplest cases, to do more would cost
5216 too much. Be careful, however, not to generate this when the
5217 alternative would take fewer insns. */
5218 if (val
& 0xffff0000)
5220 temp1
= remainder
& 0xffff0000;
5221 temp2
= remainder
& 0x0000ffff;
5223 /* Overlaps outside this range are best done using other methods. */
5224 for (i
= 9; i
< 24; i
++)
5226 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
5227 && !const_ok_for_arm (temp2
))
5229 rtx new_src
= (subtargets
5230 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5232 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
5233 source
, subtargets
, generate
);
5241 gen_rtx_ASHIFT (mode
, source
,
5248 /* Don't duplicate cases already considered. */
5249 for (i
= 17; i
< 24; i
++)
5251 if (((temp1
| (temp1
>> i
)) == remainder
)
5252 && !const_ok_for_arm (temp1
))
5254 rtx new_src
= (subtargets
5255 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
5257 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
5258 source
, subtargets
, generate
);
5263 gen_rtx_SET (target
,
5266 gen_rtx_LSHIFTRT (mode
, source
,
5277 /* If we have IOR or XOR, and the constant can be loaded in a
5278 single instruction, and we can find a temporary to put it in,
5279 then this can be done in two instructions instead of 3-4. */
5281 /* TARGET can't be NULL if SUBTARGETS is 0 */
5282 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
5284 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
5288 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5290 emit_constant_insn (cond
,
5291 gen_rtx_SET (sub
, GEN_INT (val
)));
5292 emit_constant_insn (cond
,
5293 gen_rtx_SET (target
,
5294 gen_rtx_fmt_ee (code
, mode
,
5305 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5306 and the remainder 0s for e.g. 0xfff00000)
5307 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5309 This can be done in 2 instructions by using shifts with mov or mvn.
5314 mvn r0, r0, lsr #12 */
5315 if (set_sign_bit_copies
> 8
5316 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
5320 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5321 rtx shift
= GEN_INT (set_sign_bit_copies
);
5327 gen_rtx_ASHIFT (mode
,
5332 gen_rtx_SET (target
,
5334 gen_rtx_LSHIFTRT (mode
, sub
,
5341 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5343 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5345 For eg. r0 = r0 | 0xfff
5350 if (set_zero_bit_copies
> 8
5351 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
5355 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5356 rtx shift
= GEN_INT (set_zero_bit_copies
);
5362 gen_rtx_LSHIFTRT (mode
,
5367 gen_rtx_SET (target
,
5369 gen_rtx_ASHIFT (mode
, sub
,
5375 /* This will never be reached for Thumb2 because orn is a valid
5376 instruction. This is for Thumb1 and the ARM 32 bit cases.
5378 x = y | constant (such that ~constant is a valid constant)
5380 x = ~(~y & ~constant).
5382 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
5386 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
5387 emit_constant_insn (cond
,
5389 gen_rtx_NOT (mode
, source
)));
5392 sub
= gen_reg_rtx (mode
);
5393 emit_constant_insn (cond
,
5395 gen_rtx_AND (mode
, source
,
5397 emit_constant_insn (cond
,
5398 gen_rtx_SET (target
,
5399 gen_rtx_NOT (mode
, sub
)));
5406 /* See if two shifts will do 2 or more insn's worth of work. */
5407 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5409 HOST_WIDE_INT shift_mask
= ((0xffffffff
5410 << (32 - clear_sign_bit_copies
))
5413 if ((remainder
| shift_mask
) != 0xffffffff)
5415 HOST_WIDE_INT new_val
5416 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5420 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5421 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5422 new_src
, source
, subtargets
, 1);
5427 rtx targ
= subtargets
? NULL_RTX
: target
;
5428 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5429 targ
, source
, subtargets
, 0);
5435 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5436 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5438 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5439 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5445 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5447 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5449 if ((remainder
| shift_mask
) != 0xffffffff)
5451 HOST_WIDE_INT new_val
5452 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5455 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5457 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5458 new_src
, source
, subtargets
, 1);
5463 rtx targ
= subtargets
? NULL_RTX
: target
;
5465 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5466 targ
, source
, subtargets
, 0);
5472 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5473 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5475 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5476 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5488 /* Calculate what the instruction sequences would be if we generated it
5489 normally, negated, or inverted. */
5491 /* AND cannot be split into multiple insns, so invert and use BIC. */
5494 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5497 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5502 if (can_invert
|| final_invert
)
5503 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5508 immediates
= &pos_immediates
;
5510 /* Is the negated immediate sequence more efficient? */
5511 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5514 immediates
= &neg_immediates
;
5519 /* Is the inverted immediate sequence more efficient?
5520 We must allow for an extra NOT instruction for XOR operations, although
5521 there is some chance that the final 'mvn' will get optimized later. */
5522 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5525 immediates
= &inv_immediates
;
5533 /* Now output the chosen sequence as instructions. */
5536 for (i
= 0; i
< insns
; i
++)
5538 rtx new_src
, temp1_rtx
;
5540 temp1
= immediates
->i
[i
];
5542 if (code
== SET
|| code
== MINUS
)
5543 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5544 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5545 new_src
= gen_reg_rtx (mode
);
5551 else if (can_negate
)
5554 temp1
= trunc_int_for_mode (temp1
, mode
);
5555 temp1_rtx
= GEN_INT (temp1
);
5559 else if (code
== MINUS
)
5560 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5562 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5564 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5569 can_negate
= can_invert
;
5573 else if (code
== MINUS
)
5581 emit_constant_insn (cond
, gen_rtx_SET (target
,
5582 gen_rtx_NOT (mode
, source
)));
5589 /* Return TRUE if op is a constant where both the low and top words are
5590 suitable for RSB/RSC instructions. This is never true for Thumb, since
5591 we do not have RSC in that case. */
5593 arm_const_double_prefer_rsbs_rsc (rtx op
)
5595 /* Thumb lacks RSC, so we never prefer that sequence. */
5596 if (TARGET_THUMB
|| !CONST_INT_P (op
))
5598 HOST_WIDE_INT hi
, lo
;
5599 lo
= UINTVAL (op
) & 0xffffffffULL
;
5600 hi
= UINTVAL (op
) >> 32;
5601 return const_ok_for_arm (lo
) && const_ok_for_arm (hi
);
5604 /* Canonicalize a comparison so that we are more likely to recognize it.
5605 This can be done for a few constant compares, where we can make the
5606 immediate value easier to load. */
5609 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5610 bool op0_preserve_value
)
5613 unsigned HOST_WIDE_INT i
, maxval
;
5615 mode
= GET_MODE (*op0
);
5616 if (mode
== VOIDmode
)
5617 mode
= GET_MODE (*op1
);
5619 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5621 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5622 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5623 either reversed or (for constant OP1) adjusted to GE/LT.
5624 Similarly for GTU/LEU in Thumb mode. */
5628 if (*code
== GT
|| *code
== LE
5629 || *code
== GTU
|| *code
== LEU
)
5631 /* Missing comparison. First try to use an available
5633 if (CONST_INT_P (*op1
))
5642 /* Try to convert to GE/LT, unless that would be more
5644 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5645 && arm_const_double_prefer_rsbs_rsc (*op1
))
5647 *op1
= GEN_INT (i
+ 1);
5648 *code
= *code
== GT
? GE
: LT
;
5652 /* GT maxval is always false, LE maxval is always true.
5653 We can't fold that away here as we must make a
5654 comparison, but we can fold them to comparisons
5655 with the same result that can be handled:
5656 op0 GT maxval -> op0 LT minval
5657 op0 LE maxval -> op0 GE minval
5658 where minval = (-maxval - 1). */
5659 *op1
= GEN_INT (-maxval
- 1);
5660 *code
= *code
== GT
? LT
: GE
;
5666 if (i
!= ~((unsigned HOST_WIDE_INT
) 0))
5668 /* Try to convert to GEU/LTU, unless that would
5669 be more expensive. */
5670 if (!arm_const_double_by_immediates (GEN_INT (i
+ 1))
5671 && arm_const_double_prefer_rsbs_rsc (*op1
))
5673 *op1
= GEN_INT (i
+ 1);
5674 *code
= *code
== GTU
? GEU
: LTU
;
5678 /* GTU ~0 is always false, LEU ~0 is always true.
5679 We can't fold that away here as we must make a
5680 comparison, but we can fold them to comparisons
5681 with the same result that can be handled:
5682 op0 GTU ~0 -> op0 LTU 0
5683 op0 LEU ~0 -> op0 GEU 0. */
5685 *code
= *code
== GTU
? LTU
: GEU
;
5694 if (!op0_preserve_value
)
5696 std::swap (*op0
, *op1
);
5697 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5703 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5704 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5705 to facilitate possible combining with a cmp into 'ands'. */
5707 && GET_CODE (*op0
) == ZERO_EXTEND
5708 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5709 && GET_MODE (XEXP (*op0
, 0)) == QImode
5710 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5711 && subreg_lowpart_p (XEXP (*op0
, 0))
5712 && *op1
== const0_rtx
)
5713 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5716 /* Comparisons smaller than DImode. Only adjust comparisons against
5717 an out-of-range constant. */
5718 if (!CONST_INT_P (*op1
)
5719 || const_ok_for_arm (INTVAL (*op1
))
5720 || const_ok_for_arm (- INTVAL (*op1
)))
5734 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5736 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5737 *code
= *code
== GT
? GE
: LT
;
5745 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5747 *op1
= GEN_INT (i
- 1);
5748 *code
= *code
== GE
? GT
: LE
;
5755 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5756 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5758 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5759 *code
= *code
== GTU
? GEU
: LTU
;
5767 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5769 *op1
= GEN_INT (i
- 1);
5770 *code
= *code
== GEU
? GTU
: LEU
;
5781 /* Define how to find the value returned by a function. */
5784 arm_function_value(const_tree type
, const_tree func
,
5785 bool outgoing ATTRIBUTE_UNUSED
)
5788 int unsignedp ATTRIBUTE_UNUSED
;
5789 rtx r ATTRIBUTE_UNUSED
;
5791 mode
= TYPE_MODE (type
);
5793 if (TARGET_AAPCS_BASED
)
5794 return aapcs_allocate_return_reg (mode
, type
, func
);
5796 /* Promote integer types. */
5797 if (INTEGRAL_TYPE_P (type
))
5798 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5800 /* Promotes small structs returned in a register to full-word size
5801 for big-endian AAPCS. */
5802 if (arm_return_in_msb (type
))
5804 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5805 if (size
% UNITS_PER_WORD
!= 0)
5807 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5808 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5812 return arm_libcall_value_1 (mode
);
5815 /* libcall hashtable helpers. */
5817 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5819 static inline hashval_t
hash (const rtx_def
*);
5820 static inline bool equal (const rtx_def
*, const rtx_def
*);
5821 static inline void remove (rtx_def
*);
5825 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5827 return rtx_equal_p (p1
, p2
);
5831 libcall_hasher::hash (const rtx_def
*p1
)
5833 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5836 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5839 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5841 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5845 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5847 static bool init_done
= false;
5848 static libcall_table_type
*libcall_htab
= NULL
;
5854 libcall_htab
= new libcall_table_type (31);
5855 add_libcall (libcall_htab
,
5856 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5857 add_libcall (libcall_htab
,
5858 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5859 add_libcall (libcall_htab
,
5860 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5861 add_libcall (libcall_htab
,
5862 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5864 add_libcall (libcall_htab
,
5865 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5866 add_libcall (libcall_htab
,
5867 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5868 add_libcall (libcall_htab
,
5869 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5870 add_libcall (libcall_htab
,
5871 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5873 add_libcall (libcall_htab
,
5874 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5875 add_libcall (libcall_htab
,
5876 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5877 add_libcall (libcall_htab
,
5878 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5879 add_libcall (libcall_htab
,
5880 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5881 add_libcall (libcall_htab
,
5882 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5883 add_libcall (libcall_htab
,
5884 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5885 add_libcall (libcall_htab
,
5886 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5887 add_libcall (libcall_htab
,
5888 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5889 add_libcall (libcall_htab
,
5890 convert_optab_libfunc (sfix_optab
, SImode
, SFmode
));
5891 add_libcall (libcall_htab
,
5892 convert_optab_libfunc (ufix_optab
, SImode
, SFmode
));
5894 /* Values from double-precision helper functions are returned in core
5895 registers if the selected core only supports single-precision
5896 arithmetic, even if we are using the hard-float ABI. The same is
5897 true for single-precision helpers except in case of MVE, because in
5898 MVE we will be using the hard-float ABI on a CPU which doesn't support
5899 single-precision operations in hardware. In MVE the following check
5900 enables use of emulation for the single-precision arithmetic
5902 if (TARGET_HAVE_MVE
)
5904 add_libcall (libcall_htab
, optab_libfunc (add_optab
, SFmode
));
5905 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, SFmode
));
5906 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, SFmode
));
5907 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, SFmode
));
5908 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, SFmode
));
5909 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, SFmode
));
5910 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, SFmode
));
5911 add_libcall (libcall_htab
, optab_libfunc (le_optab
, SFmode
));
5912 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, SFmode
));
5913 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, SFmode
));
5914 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, SFmode
));
5916 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5917 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5918 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5919 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5920 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5921 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5922 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5923 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5924 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5925 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5926 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5927 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5929 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5931 add_libcall (libcall_htab
,
5932 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5935 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5939 arm_libcall_value_1 (machine_mode mode
)
5941 if (TARGET_AAPCS_BASED
)
5942 return aapcs_libcall_value (mode
);
5943 else if (TARGET_IWMMXT_ABI
5944 && arm_vector_mode_supported_p (mode
))
5945 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5947 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5950 /* Define how to find the value returned by a library function
5951 assuming the value has mode MODE. */
5954 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5956 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5957 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5959 /* The following libcalls return their result in integer registers,
5960 even though they return a floating point value. */
5961 if (arm_libcall_uses_aapcs_base (libcall
))
5962 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5966 return arm_libcall_value_1 (mode
);
5969 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5972 arm_function_value_regno_p (const unsigned int regno
)
5974 if (regno
== ARG_REGISTER (1)
5976 && TARGET_AAPCS_BASED
5977 && TARGET_HARD_FLOAT
5978 && regno
== FIRST_VFP_REGNUM
)
5979 || (TARGET_IWMMXT_ABI
5980 && regno
== FIRST_IWMMXT_REGNUM
))
5986 /* Determine the amount of memory needed to store the possible return
5987 registers of an untyped call. */
5989 arm_apply_result_size (void)
5995 if (TARGET_HARD_FLOAT_ABI
)
5997 if (TARGET_IWMMXT_ABI
)
6004 /* Decide whether TYPE should be returned in memory (true)
6005 or in a register (false). FNTYPE is the type of the function making
6008 arm_return_in_memory (const_tree type
, const_tree fntype
)
6012 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
6014 if (TARGET_AAPCS_BASED
)
6016 /* Simple, non-aggregate types (ie not including vectors and
6017 complex) are always returned in a register (or registers).
6018 We don't care about which register here, so we can short-cut
6019 some of the detail. */
6020 if (!AGGREGATE_TYPE_P (type
)
6021 && TREE_CODE (type
) != VECTOR_TYPE
6022 && TREE_CODE (type
) != COMPLEX_TYPE
)
6025 /* Any return value that is no larger than one word can be
6027 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
6030 /* Check any available co-processors to see if they accept the
6031 type as a register candidate (VFP, for example, can return
6032 some aggregates in consecutive registers). These aren't
6033 available if the call is variadic. */
6034 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
6037 /* Vector values should be returned using ARM registers, not
6038 memory (unless they're over 16 bytes, which will break since
6039 we only have four call-clobbered registers to play with). */
6040 if (TREE_CODE (type
) == VECTOR_TYPE
)
6041 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6043 /* The rest go in memory. */
6047 if (TREE_CODE (type
) == VECTOR_TYPE
)
6048 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
6050 if (!AGGREGATE_TYPE_P (type
) &&
6051 (TREE_CODE (type
) != VECTOR_TYPE
))
6052 /* All simple types are returned in registers. */
6055 if (arm_abi
!= ARM_ABI_APCS
)
6057 /* ATPCS and later return aggregate types in memory only if they are
6058 larger than a word (or are variable size). */
6059 return (size
< 0 || size
> UNITS_PER_WORD
);
6062 /* For the arm-wince targets we choose to be compatible with Microsoft's
6063 ARM and Thumb compilers, which always return aggregates in memory. */
6065 /* All structures/unions bigger than one word are returned in memory.
6066 Also catch the case where int_size_in_bytes returns -1. In this case
6067 the aggregate is either huge or of variable size, and in either case
6068 we will want to return it via memory and not in a register. */
6069 if (size
< 0 || size
> UNITS_PER_WORD
)
6072 if (TREE_CODE (type
) == RECORD_TYPE
)
6076 /* For a struct the APCS says that we only return in a register
6077 if the type is 'integer like' and every addressable element
6078 has an offset of zero. For practical purposes this means
6079 that the structure can have at most one non bit-field element
6080 and that this element must be the first one in the structure. */
6082 /* Find the first field, ignoring non FIELD_DECL things which will
6083 have been created by C++. */
6084 /* NOTE: This code is deprecated and has not been updated to handle
6085 DECL_FIELD_ABI_IGNORED. */
6086 for (field
= TYPE_FIELDS (type
);
6087 field
&& TREE_CODE (field
) != FIELD_DECL
;
6088 field
= DECL_CHAIN (field
))
6092 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6094 /* Check that the first field is valid for returning in a register. */
6096 /* ... Floats are not allowed */
6097 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6100 /* ... Aggregates that are not themselves valid for returning in
6101 a register are not allowed. */
6102 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6105 /* Now check the remaining fields, if any. Only bitfields are allowed,
6106 since they are not addressable. */
6107 for (field
= DECL_CHAIN (field
);
6109 field
= DECL_CHAIN (field
))
6111 if (TREE_CODE (field
) != FIELD_DECL
)
6114 if (!DECL_BIT_FIELD_TYPE (field
))
6121 if (TREE_CODE (type
) == UNION_TYPE
)
6125 /* Unions can be returned in registers if every element is
6126 integral, or can be returned in an integer register. */
6127 for (field
= TYPE_FIELDS (type
);
6129 field
= DECL_CHAIN (field
))
6131 if (TREE_CODE (field
) != FIELD_DECL
)
6134 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
6137 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
6143 #endif /* not ARM_WINCE */
6145 /* Return all other types in memory. */
6149 const struct pcs_attribute_arg
6153 } pcs_attribute_args
[] =
6155 {"aapcs", ARM_PCS_AAPCS
},
6156 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
6158 /* We could recognize these, but changes would be needed elsewhere
6159 * to implement them. */
6160 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
6161 {"atpcs", ARM_PCS_ATPCS
},
6162 {"apcs", ARM_PCS_APCS
},
6164 {NULL
, ARM_PCS_UNKNOWN
}
6168 arm_pcs_from_attribute (tree attr
)
6170 const struct pcs_attribute_arg
*ptr
;
6173 /* Get the value of the argument. */
6174 if (TREE_VALUE (attr
) == NULL_TREE
6175 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
6176 return ARM_PCS_UNKNOWN
;
6178 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
6180 /* Check it against the list of known arguments. */
6181 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
6182 if (streq (arg
, ptr
->arg
))
6185 /* An unrecognized interrupt type. */
6186 return ARM_PCS_UNKNOWN
;
6189 /* Get the PCS variant to use for this call. TYPE is the function's type
6190 specification, DECL is the specific declartion. DECL may be null if
6191 the call could be indirect or if this is a library call. */
6193 arm_get_pcs_model (const_tree type
, const_tree decl ATTRIBUTE_UNUSED
)
6195 bool user_convention
= false;
6196 enum arm_pcs user_pcs
= arm_pcs_default
;
6201 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
6204 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
6205 user_convention
= true;
6208 if (TARGET_AAPCS_BASED
)
6210 /* Detect varargs functions. These always use the base rules
6211 (no argument is ever a candidate for a co-processor
6213 bool base_rules
= stdarg_p (type
);
6215 if (user_convention
)
6217 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
6218 sorry ("non-AAPCS derived PCS variant");
6219 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
6220 error ("variadic functions must use the base AAPCS variant");
6224 return ARM_PCS_AAPCS
;
6225 else if (user_convention
)
6228 /* Unfortunately, this is not safe and can lead to wrong code
6229 being generated (PR96882). Not all calls into the back-end
6230 pass the DECL, so it is unsafe to make any PCS-changing
6231 decisions based on it. In particular the RETURN_IN_MEMORY
6232 hook is only ever passed a TYPE. This needs revisiting to
6233 see if there are any partial improvements that can be
6235 else if (decl
&& flag_unit_at_a_time
)
6237 /* Local functions never leak outside this compilation unit,
6238 so we are free to use whatever conventions are
6240 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6241 cgraph_node
*local_info_node
6242 = cgraph_node::local_info_node (CONST_CAST_TREE (decl
));
6243 if (local_info_node
&& local_info_node
->local
)
6244 return ARM_PCS_AAPCS_LOCAL
;
6248 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
6249 sorry ("PCS variant");
6251 /* For everything else we use the target's default. */
6252 return arm_pcs_default
;
6257 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6258 const_tree fntype ATTRIBUTE_UNUSED
,
6259 rtx libcall ATTRIBUTE_UNUSED
,
6260 const_tree fndecl ATTRIBUTE_UNUSED
)
6262 /* Record the unallocated VFP registers. */
6263 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
6264 pcum
->aapcs_vfp_reg_alloc
= 0;
6267 /* Bitmasks that indicate whether earlier versions of GCC would have
6268 taken a different path through the ABI logic. This should result in
6269 a -Wpsabi warning if the earlier path led to a different ABI decision.
6271 WARN_PSABI_EMPTY_CXX17_BASE
6272 Indicates that the type includes an artificial empty C++17 base field
6273 that, prior to GCC 10.1, would prevent the type from being treated as
6274 a HFA or HVA. See PR94711 for details.
6276 WARN_PSABI_NO_UNIQUE_ADDRESS
6277 Indicates that the type includes an empty [[no_unique_address]] field
6278 that, prior to GCC 10.1, would prevent the type from being treated as
6280 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE
= 1U << 0;
6281 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS
= 1U << 1;
6282 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD
= 1U << 2;
6284 /* Walk down the type tree of TYPE counting consecutive base elements.
6285 If *MODEP is VOIDmode, then set it to the first valid floating point
6286 type. If a non-floating point type is found, or if a floating point
6287 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6288 otherwise return the count in the sub-tree.
6290 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6291 function has changed its behavior relative to earlier versions of GCC.
6292 Normally the argument should be nonnull and point to a zero-initialized
6293 variable. The function then records whether the ABI decision might
6294 be affected by a known fix to the ABI logic, setting the associated
6295 WARN_PSABI_* bits if so.
6297 When the argument is instead a null pointer, the function tries to
6298 simulate the behavior of GCC before all such ABI fixes were made.
6299 This is useful to check whether the function returns something
6300 different after the ABI fixes. */
6302 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
,
6303 unsigned int *warn_psabi_flags
)
6308 switch (TREE_CODE (type
))
6311 mode
= TYPE_MODE (type
);
6312 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
&& mode
!= BFmode
)
6315 if (*modep
== VOIDmode
)
6324 mode
= TYPE_MODE (TREE_TYPE (type
));
6325 if (mode
!= DFmode
&& mode
!= SFmode
)
6328 if (*modep
== VOIDmode
)
6337 /* Use V2SImode and V4SImode as representatives of all 64-bit
6338 and 128-bit vector types, whether or not those modes are
6339 supported with the present options. */
6340 size
= int_size_in_bytes (type
);
6353 if (*modep
== VOIDmode
)
6356 /* Vector modes are considered to be opaque: two vectors are
6357 equivalent for the purposes of being homogeneous aggregates
6358 if they are the same size. */
6367 tree index
= TYPE_DOMAIN (type
);
6369 /* Can't handle incomplete types nor sizes that are not
6371 if (!COMPLETE_TYPE_P (type
)
6372 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6375 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
,
6379 || !TYPE_MAX_VALUE (index
)
6380 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6381 || !TYPE_MIN_VALUE (index
)
6382 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6386 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6387 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6389 /* There must be no padding. */
6390 if (wi::to_wide (TYPE_SIZE (type
))
6391 != count
* GET_MODE_BITSIZE (*modep
))
6403 /* Can't handle incomplete types nor sizes that are not
6405 if (!COMPLETE_TYPE_P (type
)
6406 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6409 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6411 if (TREE_CODE (field
) != FIELD_DECL
)
6414 if (DECL_FIELD_ABI_IGNORED (field
))
6416 /* See whether this is something that earlier versions of
6417 GCC failed to ignore. */
6419 if (lookup_attribute ("no_unique_address",
6420 DECL_ATTRIBUTES (field
)))
6421 flag
= WARN_PSABI_NO_UNIQUE_ADDRESS
;
6422 else if (cxx17_empty_base_field_p (field
))
6423 flag
= WARN_PSABI_EMPTY_CXX17_BASE
;
6425 /* No compatibility problem. */
6428 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6429 if (warn_psabi_flags
)
6431 *warn_psabi_flags
|= flag
;
6435 /* A zero-width bitfield may affect layout in some
6436 circumstances, but adds no members. The determination
6437 of whether or not a type is an HFA is performed after
6438 layout is complete, so if the type still looks like an
6439 HFA afterwards, it is still classed as one. This is
6440 potentially an ABI break for the hard-float ABI. */
6441 else if (DECL_BIT_FIELD (field
)
6442 && integer_zerop (DECL_SIZE (field
)))
6444 /* Prior to GCC-12 these fields were striped early,
6445 hiding them from the back-end entirely and
6446 resulting in the correct behaviour for argument
6447 passing. Simulate that old behaviour without
6448 generating a warning. */
6449 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field
))
6451 if (warn_psabi_flags
)
6453 *warn_psabi_flags
|= WARN_PSABI_ZERO_WIDTH_BITFIELD
;
6458 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6465 /* There must be no padding. */
6466 if (wi::to_wide (TYPE_SIZE (type
))
6467 != count
* GET_MODE_BITSIZE (*modep
))
6474 case QUAL_UNION_TYPE
:
6476 /* These aren't very interesting except in a degenerate case. */
6481 /* Can't handle incomplete types nor sizes that are not
6483 if (!COMPLETE_TYPE_P (type
)
6484 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6487 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6489 if (TREE_CODE (field
) != FIELD_DECL
)
6492 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
,
6496 count
= count
> sub_count
? count
: sub_count
;
6499 /* There must be no padding. */
6500 if (wi::to_wide (TYPE_SIZE (type
))
6501 != count
* GET_MODE_BITSIZE (*modep
))
6514 /* Return true if PCS_VARIANT should use VFP registers. */
6516 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
6518 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
6520 static bool seen_thumb1_vfp
= false;
6522 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
6524 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6525 /* sorry() is not immediately fatal, so only display this once. */
6526 seen_thumb1_vfp
= true;
6532 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
6535 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
6536 (TARGET_VFP_DOUBLE
|| !is_double
));
6539 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6540 suitable for passing or returning in VFP registers for the PCS
6541 variant selected. If it is, then *BASE_MODE is updated to contain
6542 a machine mode describing each element of the argument's type and
6543 *COUNT to hold the number of such elements. */
6545 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
6546 machine_mode mode
, const_tree type
,
6547 machine_mode
*base_mode
, int *count
)
6549 machine_mode new_mode
= VOIDmode
;
6551 /* If we have the type information, prefer that to working things
6552 out from the mode. */
6555 unsigned int warn_psabi_flags
= 0;
6556 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
,
6558 if (ag_count
> 0 && ag_count
<= 4)
6560 static unsigned last_reported_type_uid
;
6561 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (type
));
6565 && uid
!= last_reported_type_uid
6566 && ((alt
= aapcs_vfp_sub_candidate (type
, &new_mode
, NULL
))
6570 = CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
6572 = CHANGES_ROOT_URL
"gcc-12/changes.html#zero_width_bitfields";
6573 gcc_assert (alt
== -1);
6574 last_reported_type_uid
= uid
;
6575 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6577 if (warn_psabi_flags
& WARN_PSABI_NO_UNIQUE_ADDRESS
)
6578 inform (input_location
, "parameter passing for argument of "
6579 "type %qT with %<[[no_unique_address]]%> members "
6580 "changed %{in GCC 10.1%}",
6581 TYPE_MAIN_VARIANT (type
), url10
);
6582 else if (warn_psabi_flags
& WARN_PSABI_EMPTY_CXX17_BASE
)
6583 inform (input_location
, "parameter passing for argument of "
6584 "type %qT when C++17 is enabled changed to match "
6585 "C++14 %{in GCC 10.1%}",
6586 TYPE_MAIN_VARIANT (type
), url10
);
6587 else if (warn_psabi_flags
& WARN_PSABI_ZERO_WIDTH_BITFIELD
)
6588 inform (input_location
, "parameter passing for argument of "
6589 "type %qT changed %{in GCC 12.1%}",
6590 TYPE_MAIN_VARIANT (type
), url12
);
6597 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6598 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6599 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6604 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6607 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6613 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6616 *base_mode
= new_mode
;
6618 if (TARGET_GENERAL_REGS_ONLY
)
6619 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6626 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6627 machine_mode mode
, const_tree type
)
6629 int count ATTRIBUTE_UNUSED
;
6630 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6632 if (!use_vfp_abi (pcs_variant
, false))
6634 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6639 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6642 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6645 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6646 &pcum
->aapcs_vfp_rmode
,
6647 &pcum
->aapcs_vfp_rcount
);
6650 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6651 for the behaviour of this function. */
6654 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6655 const_tree type ATTRIBUTE_UNUSED
)
6658 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6659 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6660 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6663 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6664 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6666 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6668 || (mode
== TImode
&& ! (TARGET_NEON
|| TARGET_HAVE_MVE
))
6669 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6672 int rcount
= pcum
->aapcs_vfp_rcount
;
6674 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6676 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6678 /* Avoid using unsupported vector modes. */
6679 if (rmode
== V2SImode
)
6681 else if (rmode
== V4SImode
)
6688 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6689 for (i
= 0; i
< rcount
; i
++)
6691 rtx tmp
= gen_rtx_REG (rmode
,
6692 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6693 tmp
= gen_rtx_EXPR_LIST
6695 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6696 XVECEXP (par
, 0, i
) = tmp
;
6699 pcum
->aapcs_reg
= par
;
6702 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6708 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6709 comment there for the behaviour of this function. */
6712 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6714 const_tree type ATTRIBUTE_UNUSED
)
6716 if (!use_vfp_abi (pcs_variant
, false))
6720 || (GET_MODE_CLASS (mode
) == MODE_INT
6721 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6722 && !(TARGET_NEON
|| TARGET_HAVE_MVE
)))
6725 machine_mode ag_mode
;
6730 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6733 if (!(TARGET_NEON
|| TARGET_HAVE_MVE
))
6735 if (ag_mode
== V2SImode
)
6737 else if (ag_mode
== V4SImode
)
6743 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6744 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6745 for (i
= 0; i
< count
; i
++)
6747 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6748 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6749 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6750 XVECEXP (par
, 0, i
) = tmp
;
6756 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6760 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6761 machine_mode mode ATTRIBUTE_UNUSED
,
6762 const_tree type ATTRIBUTE_UNUSED
)
6764 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6765 pcum
->aapcs_vfp_reg_alloc
= 0;
6769 #define AAPCS_CP(X) \
6771 aapcs_ ## X ## _cum_init, \
6772 aapcs_ ## X ## _is_call_candidate, \
6773 aapcs_ ## X ## _allocate, \
6774 aapcs_ ## X ## _is_return_candidate, \
6775 aapcs_ ## X ## _allocate_return_reg, \
6776 aapcs_ ## X ## _advance \
6779 /* Table of co-processors that can be used to pass arguments in
6780 registers. Idealy no arugment should be a candidate for more than
6781 one co-processor table entry, but the table is processed in order
6782 and stops after the first match. If that entry then fails to put
6783 the argument into a co-processor register, the argument will go on
6787 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6788 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6790 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6791 BLKmode) is a candidate for this co-processor's registers; this
6792 function should ignore any position-dependent state in
6793 CUMULATIVE_ARGS and only use call-type dependent information. */
6794 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6796 /* Return true if the argument does get a co-processor register; it
6797 should set aapcs_reg to an RTX of the register allocated as is
6798 required for a return from FUNCTION_ARG. */
6799 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6801 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6802 be returned in this co-processor's registers. */
6803 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6805 /* Allocate and return an RTX element to hold the return type of a call. This
6806 routine must not fail and will only be called if is_return_candidate
6807 returned true with the same parameters. */
6808 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6810 /* Finish processing this argument and prepare to start processing
6812 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6813 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6821 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6826 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6827 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6834 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6836 /* We aren't passed a decl, so we can't check that a call is local.
6837 However, it isn't clear that that would be a win anyway, since it
6838 might limit some tail-calling opportunities. */
6839 enum arm_pcs pcs_variant
;
6843 const_tree fndecl
= NULL_TREE
;
6845 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6848 fntype
= TREE_TYPE (fntype
);
6851 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6854 pcs_variant
= arm_pcs_default
;
6856 if (pcs_variant
!= ARM_PCS_AAPCS
)
6860 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6861 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6870 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6873 /* We aren't passed a decl, so we can't check that a call is local.
6874 However, it isn't clear that that would be a win anyway, since it
6875 might limit some tail-calling opportunities. */
6876 enum arm_pcs pcs_variant
;
6877 int unsignedp ATTRIBUTE_UNUSED
;
6881 const_tree fndecl
= NULL_TREE
;
6883 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6886 fntype
= TREE_TYPE (fntype
);
6889 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6892 pcs_variant
= arm_pcs_default
;
6894 /* Promote integer types. */
6895 if (type
&& INTEGRAL_TYPE_P (type
))
6896 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6898 if (pcs_variant
!= ARM_PCS_AAPCS
)
6902 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6903 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6905 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6909 /* Promotes small structs returned in a register to full-word size
6910 for big-endian AAPCS. */
6911 if (type
&& arm_return_in_msb (type
))
6913 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6914 if (size
% UNITS_PER_WORD
!= 0)
6916 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6917 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6921 return gen_rtx_REG (mode
, R0_REGNUM
);
6925 aapcs_libcall_value (machine_mode mode
)
6927 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6928 && GET_MODE_SIZE (mode
) <= 4)
6931 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6934 /* Lay out a function argument using the AAPCS rules. The rule
6935 numbers referred to here are those in the AAPCS. */
6937 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6938 const_tree type
, bool named
)
6943 /* We only need to do this once per argument. */
6944 if (pcum
->aapcs_arg_processed
)
6947 pcum
->aapcs_arg_processed
= true;
6949 /* Special case: if named is false then we are handling an incoming
6950 anonymous argument which is on the stack. */
6954 /* Is this a potential co-processor register candidate? */
6955 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6957 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6958 pcum
->aapcs_cprc_slot
= slot
;
6960 /* We don't have to apply any of the rules from part B of the
6961 preparation phase, these are handled elsewhere in the
6966 /* A Co-processor register candidate goes either in its own
6967 class of registers or on the stack. */
6968 if (!pcum
->aapcs_cprc_failed
[slot
])
6970 /* C1.cp - Try to allocate the argument to co-processor
6972 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6975 /* C2.cp - Put the argument on the stack and note that we
6976 can't assign any more candidates in this slot. We also
6977 need to note that we have allocated stack space, so that
6978 we won't later try to split a non-cprc candidate between
6979 core registers and the stack. */
6980 pcum
->aapcs_cprc_failed
[slot
] = true;
6981 pcum
->can_split
= false;
6984 /* We didn't get a register, so this argument goes on the
6986 gcc_assert (pcum
->can_split
== false);
6991 /* C3 - For double-word aligned arguments, round the NCRN up to the
6992 next even number. */
6993 ncrn
= pcum
->aapcs_ncrn
;
6996 int res
= arm_needs_doubleword_align (mode
, type
);
6997 /* Only warn during RTL expansion of call stmts, otherwise we would
6998 warn e.g. during gimplification even on functions that will be
6999 always inlined, and we'd warn multiple times. Don't warn when
7000 called in expand_function_start either, as we warn instead in
7001 arm_function_arg_boundary in that case. */
7002 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
7003 inform (input_location
, "parameter passing for argument of type "
7004 "%qT changed in GCC 7.1", type
);
7009 nregs
= ARM_NUM_REGS2(mode
, type
);
7011 /* Sigh, this test should really assert that nregs > 0, but a GCC
7012 extension allows empty structs and then gives them empty size; it
7013 then allows such a structure to be passed by value. For some of
7014 the code below we have to pretend that such an argument has
7015 non-zero size so that we 'locate' it correctly either in
7016 registers or on the stack. */
7017 gcc_assert (nregs
>= 0);
7019 nregs2
= nregs
? nregs
: 1;
7021 /* C4 - Argument fits entirely in core registers. */
7022 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
7024 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7025 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
7029 /* C5 - Some core registers left and there are no arguments already
7030 on the stack: split this argument between the remaining core
7031 registers and the stack. */
7032 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
7034 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
7035 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7036 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
7040 /* C6 - NCRN is set to 4. */
7041 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
7043 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7047 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7048 for a call to a function whose data type is FNTYPE.
7049 For a library call, FNTYPE is NULL. */
7051 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
7053 tree fndecl ATTRIBUTE_UNUSED
)
7055 /* Long call handling. */
7057 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
7059 pcum
->pcs_variant
= arm_pcs_default
;
7061 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7063 if (arm_libcall_uses_aapcs_base (libname
))
7064 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
7066 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
7067 pcum
->aapcs_reg
= NULL_RTX
;
7068 pcum
->aapcs_partial
= 0;
7069 pcum
->aapcs_arg_processed
= false;
7070 pcum
->aapcs_cprc_slot
= -1;
7071 pcum
->can_split
= true;
7073 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
7077 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
7079 pcum
->aapcs_cprc_failed
[i
] = false;
7080 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
7088 /* On the ARM, the offset starts at 0. */
7090 pcum
->iwmmxt_nregs
= 0;
7091 pcum
->can_split
= true;
7093 /* Varargs vectors are treated the same as long long.
7094 named_count avoids having to change the way arm handles 'named' */
7095 pcum
->named_count
= 0;
7098 if (TARGET_REALLY_IWMMXT
&& fntype
)
7102 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
7104 fn_arg
= TREE_CHAIN (fn_arg
))
7105 pcum
->named_count
+= 1;
7107 if (! pcum
->named_count
)
7108 pcum
->named_count
= INT_MAX
;
7112 /* Return 2 if double word alignment is required for argument passing,
7113 but wasn't required before the fix for PR88469.
7114 Return 1 if double word alignment is required for argument passing.
7115 Return -1 if double word alignment used to be required for argument
7116 passing before PR77728 ABI fix, but is not required anymore.
7117 Return 0 if double word alignment is not required and wasn't requried
7120 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
7123 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
7125 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7126 if (!AGGREGATE_TYPE_P (type
))
7127 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
7129 /* Array types: Use member alignment of element type. */
7130 if (TREE_CODE (type
) == ARRAY_TYPE
)
7131 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
7135 /* Record/aggregate types: Use greatest member alignment of any member.
7137 Note that we explicitly consider zero-sized fields here, even though
7138 they don't map to AAPCS machine types. For example, in:
7140 struct __attribute__((aligned(8))) empty {};
7143 [[no_unique_address]] empty e;
7147 "s" contains only one Fundamental Data Type (the int field)
7148 but gains 8-byte alignment and size thanks to "e". */
7149 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7150 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
7152 if (TREE_CODE (field
) == FIELD_DECL
)
7155 /* Before PR77728 fix, we were incorrectly considering also
7156 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7157 Make sure we can warn about that with -Wpsabi. */
7160 else if (TREE_CODE (field
) == FIELD_DECL
7161 && DECL_BIT_FIELD_TYPE (field
)
7162 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field
)) > PARM_BOUNDARY
)
7172 /* Determine where to put an argument to a function.
7173 Value is zero to push the argument on the stack,
7174 or a hard register in which to store the argument.
7176 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7177 the preceding args and about the function being called.
7178 ARG is a description of the argument.
7180 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7181 other arguments are passed on the stack. If (NAMED == 0) (which happens
7182 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7183 defined), say it is passed in the stack (function_prologue will
7184 indeed make it pass in the stack if necessary). */
7187 arm_function_arg (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7189 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7192 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7193 a call insn (op3 of a call_value insn). */
7194 if (arg
.end_marker_p ())
7197 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7199 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7200 return pcum
->aapcs_reg
;
7203 /* Varargs vectors are treated the same as long long.
7204 named_count avoids having to change the way arm handles 'named' */
7205 if (TARGET_IWMMXT_ABI
7206 && arm_vector_mode_supported_p (arg
.mode
)
7207 && pcum
->named_count
> pcum
->nargs
+ 1)
7209 if (pcum
->iwmmxt_nregs
<= 9)
7210 return gen_rtx_REG (arg
.mode
,
7211 pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
7214 pcum
->can_split
= false;
7219 /* Put doubleword aligned quantities in even register pairs. */
7220 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
7222 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
7223 if (res
< 0 && warn_psabi
)
7224 inform (input_location
, "parameter passing for argument of type "
7225 "%qT changed in GCC 7.1", arg
.type
);
7229 if (res
> 1 && warn_psabi
)
7230 inform (input_location
, "parameter passing for argument of type "
7231 "%qT changed in GCC 9.1", arg
.type
);
7235 /* Only allow splitting an arg between regs and memory if all preceding
7236 args were allocated to regs. For args passed by reference we only count
7237 the reference pointer. */
7238 if (pcum
->can_split
)
7241 nregs
= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7243 if (!arg
.named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
7246 return gen_rtx_REG (arg
.mode
, pcum
->nregs
);
7250 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
7252 if (!ARM_DOUBLEWORD_ALIGN
)
7253 return PARM_BOUNDARY
;
7255 int res
= arm_needs_doubleword_align (mode
, type
);
7256 if (res
< 0 && warn_psabi
)
7257 inform (input_location
, "parameter passing for argument of type %qT "
7258 "changed in GCC 7.1", type
);
7259 if (res
> 1 && warn_psabi
)
7260 inform (input_location
, "parameter passing for argument of type "
7261 "%qT changed in GCC 9.1", type
);
7263 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
7267 arm_arg_partial_bytes (cumulative_args_t pcum_v
, const function_arg_info
&arg
)
7269 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7270 int nregs
= pcum
->nregs
;
7272 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7274 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7275 return pcum
->aapcs_partial
;
7278 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (arg
.mode
))
7281 if (NUM_ARG_REGS
> nregs
7282 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (arg
.mode
, arg
.type
))
7284 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
7289 /* Update the data in PCUM to advance over argument ARG. */
7292 arm_function_arg_advance (cumulative_args_t pcum_v
,
7293 const function_arg_info
&arg
)
7295 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
7297 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
7299 aapcs_layout_arg (pcum
, arg
.mode
, arg
.type
, arg
.named
);
7301 if (pcum
->aapcs_cprc_slot
>= 0)
7303 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, arg
.mode
,
7305 pcum
->aapcs_cprc_slot
= -1;
7308 /* Generic stuff. */
7309 pcum
->aapcs_arg_processed
= false;
7310 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
7311 pcum
->aapcs_reg
= NULL_RTX
;
7312 pcum
->aapcs_partial
= 0;
7317 if (arm_vector_mode_supported_p (arg
.mode
)
7318 && pcum
->named_count
> pcum
->nargs
7319 && TARGET_IWMMXT_ABI
)
7320 pcum
->iwmmxt_nregs
+= 1;
7322 pcum
->nregs
+= ARM_NUM_REGS2 (arg
.mode
, arg
.type
);
7326 /* Variable sized types are passed by reference. This is a GCC
7327 extension to the ARM ABI. */
7330 arm_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
7332 return arg
.type
&& TREE_CODE (TYPE_SIZE (arg
.type
)) != INTEGER_CST
;
7335 /* Encode the current state of the #pragma [no_]long_calls. */
7338 OFF
, /* No #pragma [no_]long_calls is in effect. */
7339 LONG
, /* #pragma long_calls is in effect. */
7340 SHORT
/* #pragma no_long_calls is in effect. */
7343 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
7346 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7348 arm_pragma_long_calls
= LONG
;
7352 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7354 arm_pragma_long_calls
= SHORT
;
7358 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
7360 arm_pragma_long_calls
= OFF
;
7363 /* Handle an attribute requiring a FUNCTION_DECL;
7364 arguments as in struct attribute_spec.handler. */
7366 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
7367 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7369 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7371 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7373 *no_add_attrs
= true;
7379 /* Handle an "interrupt" or "isr" attribute;
7380 arguments as in struct attribute_spec.handler. */
7382 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
7387 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7389 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7391 *no_add_attrs
= true;
7393 else if (TARGET_VFP_BASE
)
7395 warning (OPT_Wattributes
, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7398 /* FIXME: the argument if any is checked for type attributes;
7399 should it be checked for decl ones? */
7403 if (TREE_CODE (*node
) == FUNCTION_TYPE
7404 || TREE_CODE (*node
) == METHOD_TYPE
)
7406 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
7408 warning (OPT_Wattributes
, "%qE attribute ignored",
7410 *no_add_attrs
= true;
7413 else if (TREE_CODE (*node
) == POINTER_TYPE
7414 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
7415 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
7416 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
7418 *node
= build_variant_type_copy (*node
);
7419 TREE_TYPE (*node
) = build_type_attribute_variant
7421 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
7422 *no_add_attrs
= true;
7426 /* Possibly pass this attribute on from the type to a decl. */
7427 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
7428 | (int) ATTR_FLAG_FUNCTION_NEXT
7429 | (int) ATTR_FLAG_ARRAY_NEXT
))
7431 *no_add_attrs
= true;
7432 return tree_cons (name
, args
, NULL_TREE
);
7436 warning (OPT_Wattributes
, "%qE attribute ignored",
7445 /* Handle a "pcs" attribute; arguments as in struct
7446 attribute_spec.handler. */
7448 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
7449 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
7451 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
7453 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
7454 *no_add_attrs
= true;
7459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7460 /* Handle the "notshared" attribute. This attribute is another way of
7461 requesting hidden visibility. ARM's compiler supports
7462 "__declspec(notshared)"; we support the same thing via an
7466 arm_handle_notshared_attribute (tree
*node
,
7467 tree name ATTRIBUTE_UNUSED
,
7468 tree args ATTRIBUTE_UNUSED
,
7469 int flags ATTRIBUTE_UNUSED
,
7472 tree decl
= TYPE_NAME (*node
);
7476 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
7477 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
7478 *no_add_attrs
= false;
7484 /* This function returns true if a function with declaration FNDECL and type
7485 FNTYPE uses the stack to pass arguments or return variables and false
7486 otherwise. This is used for functions with the attributes
7487 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7488 diagnostic messages if the stack is used. NAME is the name of the attribute
7492 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
7494 function_args_iterator args_iter
;
7495 CUMULATIVE_ARGS args_so_far_v
;
7496 cumulative_args_t args_so_far
;
7497 bool first_param
= true;
7498 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
7500 /* Error out if any argument is passed on the stack. */
7501 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
7502 args_so_far
= pack_cumulative_args (&args_so_far_v
);
7503 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
7507 prev_arg_type
= arg_type
;
7508 if (VOID_TYPE_P (arg_type
))
7511 function_arg_info
arg (arg_type
, /*named=*/true);
7513 /* ??? We should advance after processing the argument and pass
7514 the argument we're advancing past. */
7515 arm_function_arg_advance (args_so_far
, arg
);
7516 arg_rtx
= arm_function_arg (args_so_far
, arg
);
7517 if (!arg_rtx
|| arm_arg_partial_bytes (args_so_far
, arg
))
7519 error ("%qE attribute not available to functions with arguments "
7520 "passed on the stack", name
);
7523 first_param
= false;
7526 /* Error out for variadic functions since we cannot control how many
7527 arguments will be passed and thus stack could be used. stdarg_p () is not
7528 used for the checking to avoid browsing arguments twice. */
7529 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
7531 error ("%qE attribute not available to functions with variable number "
7532 "of arguments", name
);
7536 /* Error out if return value is passed on the stack. */
7537 ret_type
= TREE_TYPE (fntype
);
7538 if (arm_return_in_memory (ret_type
, fntype
))
7540 error ("%qE attribute not available to functions that return value on "
7547 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7548 function will check whether the attribute is allowed here and will add the
7549 attribute to the function declaration tree or otherwise issue a warning. */
7552 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
7561 *no_add_attrs
= true;
7562 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7567 /* Ignore attribute for function types. */
7568 if (TREE_CODE (*node
) != FUNCTION_DECL
)
7570 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
7572 *no_add_attrs
= true;
7578 /* Warn for static linkage functions. */
7579 if (!TREE_PUBLIC (fndecl
))
7581 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
7582 "with static linkage", name
);
7583 *no_add_attrs
= true;
7587 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
7588 TREE_TYPE (fndecl
));
7593 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7594 function will check whether the attribute is allowed here and will add the
7595 attribute to the function type tree or otherwise issue a diagnostic. The
7596 reason we check this at declaration time is to only allow the use of the
7597 attribute with declarations of function pointers and not function
7598 declarations. This function checks NODE is of the expected type and issues
7599 diagnostics otherwise using NAME. If it is not of the expected type
7600 *NO_ADD_ATTRS will be set to true. */
7603 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
7608 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
7613 *no_add_attrs
= true;
7614 warning (OPT_Wattributes
, "%qE attribute ignored without %<-mcmse%> "
7619 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7622 fntype
= TREE_TYPE (decl
);
7625 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7626 fntype
= TREE_TYPE (fntype
);
7628 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7630 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7631 "function pointer", name
);
7632 *no_add_attrs
= true;
7636 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7641 /* Prevent trees being shared among function types with and without
7642 cmse_nonsecure_call attribute. */
7643 type
= TREE_TYPE (decl
);
7645 type
= build_distinct_type_copy (type
);
7646 TREE_TYPE (decl
) = type
;
7649 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7652 fntype
= TREE_TYPE (fntype
);
7653 fntype
= build_distinct_type_copy (fntype
);
7654 TREE_TYPE (type
) = fntype
;
7657 /* Construct a type attribute and add it to the function type. */
7658 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7659 TYPE_ATTRIBUTES (fntype
));
7660 TYPE_ATTRIBUTES (fntype
) = attrs
;
7664 /* Return 0 if the attributes for two types are incompatible, 1 if they
7665 are compatible, and 2 if they are nearly compatible (which causes a
7666 warning to be generated). */
7668 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7672 tree attrs1
= lookup_attribute ("Advanced SIMD type",
7673 TYPE_ATTRIBUTES (type1
));
7674 tree attrs2
= lookup_attribute ("Advanced SIMD type",
7675 TYPE_ATTRIBUTES (type2
));
7676 if (bool (attrs1
) != bool (attrs2
))
7678 if (attrs1
&& !attribute_value_equal (attrs1
, attrs2
))
7681 /* Check for mismatch of non-default calling convention. */
7682 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7685 /* Check for mismatched call attributes. */
7686 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7687 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7688 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7689 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7691 /* Only bother to check if an attribute is defined. */
7692 if (l1
| l2
| s1
| s2
)
7694 /* If one type has an attribute, the other must have the same attribute. */
7695 if ((l1
!= l2
) || (s1
!= s2
))
7698 /* Disallow mixed attributes. */
7699 if ((l1
& s2
) || (l2
& s1
))
7703 /* Check for mismatched ISR attribute. */
7704 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7706 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7707 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7709 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7713 l1
= lookup_attribute ("cmse_nonsecure_call",
7714 TYPE_ATTRIBUTES (type1
)) != NULL
;
7715 l2
= lookup_attribute ("cmse_nonsecure_call",
7716 TYPE_ATTRIBUTES (type2
)) != NULL
;
7724 /* Assigns default attributes to newly defined type. This is used to
7725 set short_call/long_call attributes for function types of
7726 functions defined inside corresponding #pragma scopes. */
7728 arm_set_default_type_attributes (tree type
)
7730 /* Add __attribute__ ((long_call)) to all functions, when
7731 inside #pragma long_calls or __attribute__ ((short_call)),
7732 when inside #pragma no_long_calls. */
7733 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7735 tree type_attr_list
, attr_name
;
7736 type_attr_list
= TYPE_ATTRIBUTES (type
);
7738 if (arm_pragma_long_calls
== LONG
)
7739 attr_name
= get_identifier ("long_call");
7740 else if (arm_pragma_long_calls
== SHORT
)
7741 attr_name
= get_identifier ("short_call");
7745 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7746 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7750 /* Return true if DECL is known to be linked into section SECTION. */
7753 arm_function_in_section_p (tree decl
, section
*section
)
7755 /* We can only be certain about the prevailing symbol definition. */
7756 if (!decl_binds_to_current_def_p (decl
))
7759 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7760 if (!DECL_SECTION_NAME (decl
))
7762 /* Make sure that we will not create a unique section for DECL. */
7763 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7767 return function_section (decl
) == section
;
7770 /* Return nonzero if a 32-bit "long_call" should be generated for
7771 a call from the current function to DECL. We generate a long_call
7774 a. has an __attribute__((long call))
7775 or b. is within the scope of a #pragma long_calls
7776 or c. the -mlong-calls command line switch has been specified
7778 However we do not generate a long call if the function:
7780 d. has an __attribute__ ((short_call))
7781 or e. is inside the scope of a #pragma no_long_calls
7782 or f. is defined in the same section as the current function. */
7785 arm_is_long_call_p (tree decl
)
7790 return TARGET_LONG_CALLS
;
7792 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7793 if (lookup_attribute ("short_call", attrs
))
7796 /* For "f", be conservative, and only cater for cases in which the
7797 whole of the current function is placed in the same section. */
7798 if (!flag_reorder_blocks_and_partition
7799 && TREE_CODE (decl
) == FUNCTION_DECL
7800 && arm_function_in_section_p (decl
, current_function_section ()))
7803 if (lookup_attribute ("long_call", attrs
))
7806 return TARGET_LONG_CALLS
;
7809 /* Return nonzero if it is ok to make a tail-call to DECL. */
7811 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7813 unsigned long func_type
;
7815 if (cfun
->machine
->sibcall_blocked
)
7820 /* In FDPIC, never tailcall something for which we have no decl:
7821 the target function could be in a different module, requiring
7822 a different FDPIC register value. */
7827 /* Never tailcall something if we are generating code for Thumb-1. */
7831 /* The PIC register is live on entry to VxWorks PLT entries, so we
7832 must make the call before restoring the PIC register. */
7833 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7836 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7837 may be used both as target of the call and base register for restoring
7838 the VFP registers */
7839 if (TARGET_APCS_FRAME
&& TARGET_ARM
7840 && TARGET_HARD_FLOAT
7841 && decl
&& arm_is_long_call_p (decl
))
7844 /* If we are interworking and the function is not declared static
7845 then we can't tail-call it unless we know that it exists in this
7846 compilation unit (since it might be a Thumb routine). */
7847 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7848 && !TREE_ASM_WRITTEN (decl
))
7851 func_type
= arm_current_func_type ();
7852 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7853 if (IS_INTERRUPT (func_type
))
7856 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7857 generated for entry functions themselves. */
7858 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7861 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7862 this would complicate matters for later code generation. */
7863 if (TREE_CODE (exp
) == CALL_EXPR
)
7865 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7866 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7870 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7872 /* Check that the return value locations are the same. For
7873 example that we aren't returning a value from the sibling in
7874 a VFP register but then need to transfer it to a core
7877 tree decl_or_type
= decl
;
7879 /* If it is an indirect function pointer, get the function type. */
7881 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7883 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7884 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7886 if (!rtx_equal_p (a
, b
))
7890 /* Never tailcall if function may be called with a misaligned SP. */
7891 if (IS_STACKALIGN (func_type
))
7894 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7895 references should become a NOP. Don't convert such calls into
7897 if (TARGET_AAPCS_BASED
7898 && arm_abi
== ARM_ABI_AAPCS
7900 && DECL_WEAK (decl
))
7903 /* We cannot do a tailcall for an indirect call by descriptor if all the
7904 argument registers are used because the only register left to load the
7905 address is IP and it will already contain the static chain. */
7906 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7908 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7909 CUMULATIVE_ARGS cum
;
7910 cumulative_args_t cum_v
;
7912 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7913 cum_v
= pack_cumulative_args (&cum
);
7915 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7917 tree type
= TREE_VALUE (t
);
7918 if (!VOID_TYPE_P (type
))
7920 function_arg_info
arg (type
, /*named=*/true);
7921 arm_function_arg_advance (cum_v
, arg
);
7925 function_arg_info
arg (integer_type_node
, /*named=*/true);
7926 if (!arm_function_arg (cum_v
, arg
))
7930 /* Everything else is ok. */
7935 /* Addressing mode support functions. */
7937 /* Return nonzero if X is a legitimate immediate operand when compiling
7938 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7940 legitimate_pic_operand_p (rtx x
)
7942 if (SYMBOL_REF_P (x
)
7943 || (GET_CODE (x
) == CONST
7944 && GET_CODE (XEXP (x
, 0)) == PLUS
7945 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7951 /* Record that the current function needs a PIC register. If PIC_REG is null,
7952 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7953 both case cfun->machine->pic_reg is initialized if we have not already done
7954 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7955 PIC register is reloaded in the current position of the instruction stream
7956 irregardless of whether it was loaded before. Otherwise, it is only loaded
7957 if not already done so (crtl->uses_pic_offset_table is null). Note that
7958 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7959 is only supported iff COMPUTE_NOW is false. */
7962 require_pic_register (rtx pic_reg
, bool compute_now
)
7964 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
7966 /* A lot of the logic here is made obscure by the fact that this
7967 routine gets called as part of the rtx cost estimation process.
7968 We don't want those calls to affect any assumptions about the real
7969 function; and further, we can't call entry_of_function() until we
7970 start the real expansion process. */
7971 if (!crtl
->uses_pic_offset_table
|| compute_now
)
7973 gcc_assert (can_create_pseudo_p ()
7974 || (pic_reg
!= NULL_RTX
7976 && GET_MODE (pic_reg
) == Pmode
));
7977 if (arm_pic_register
!= INVALID_REGNUM
7979 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7981 if (!cfun
->machine
->pic_reg
)
7982 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7984 /* Play games to avoid marking the function as needing pic
7985 if we are being called as part of the cost-estimation
7987 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7988 crtl
->uses_pic_offset_table
= 1;
7992 rtx_insn
*seq
, *insn
;
7994 if (pic_reg
== NULL_RTX
)
7995 pic_reg
= gen_reg_rtx (Pmode
);
7996 if (!cfun
->machine
->pic_reg
)
7997 cfun
->machine
->pic_reg
= pic_reg
;
7999 /* Play games to avoid marking the function as needing pic
8000 if we are being called as part of the cost-estimation
8002 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
8004 crtl
->uses_pic_offset_table
= 1;
8007 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
8008 && arm_pic_register
> LAST_LO_REGNUM
8010 emit_move_insn (cfun
->machine
->pic_reg
,
8011 gen_rtx_REG (Pmode
, arm_pic_register
));
8013 arm_load_pic_register (0UL, pic_reg
);
8018 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
8020 INSN_LOCATION (insn
) = prologue_location
;
8022 /* We can be called during expansion of PHI nodes, where
8023 we can't yet emit instructions directly in the final
8024 insn stream. Queue the insns on the entry edge, they will
8025 be committed after everything else is expanded. */
8026 if (currently_expanding_to_rtl
)
8027 insert_insn_on_edge (seq
,
8029 (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
8037 /* Generate insns to calculate the address of ORIG in pic mode. */
8039 calculate_pic_address_constant (rtx reg
, rtx pic_reg
, rtx orig
)
8044 pat
= gen_calculate_pic_address (reg
, pic_reg
, orig
);
8046 /* Make the MEM as close to a constant as possible. */
8047 mem
= SET_SRC (pat
);
8048 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
8049 MEM_READONLY_P (mem
) = 1;
8050 MEM_NOTRAP_P (mem
) = 1;
8052 return emit_insn (pat
);
8055 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8056 created to hold the result of the load. If not NULL, PIC_REG indicates
8057 which register to use as PIC register, otherwise it is decided by register
8058 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8059 location in the instruction stream, irregardless of whether it was loaded
8060 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8061 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8063 Returns the register REG into which the PIC load is performed. */
8066 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
, rtx pic_reg
,
8069 gcc_assert (compute_now
== (pic_reg
!= NULL_RTX
));
8071 if (SYMBOL_REF_P (orig
)
8072 || LABEL_REF_P (orig
))
8076 gcc_assert (can_create_pseudo_p ());
8077 reg
= gen_reg_rtx (Pmode
);
8080 /* VxWorks does not impose a fixed gap between segments; the run-time
8081 gap can be different from the object-file gap. We therefore can't
8082 use GOTOFF unless we are absolutely sure that the symbol is in the
8083 same segment as the GOT. Unfortunately, the flexibility of linker
8084 scripts means that we can't be sure of that in general, so assume
8085 that GOTOFF is never valid on VxWorks. */
8086 /* References to weak symbols cannot be resolved locally: they
8087 may be overridden by a non-weak definition at link time. */
8089 if ((LABEL_REF_P (orig
)
8090 || (SYMBOL_REF_P (orig
)
8091 && SYMBOL_REF_LOCAL_P (orig
)
8092 && (SYMBOL_REF_DECL (orig
)
8093 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)
8094 && (!SYMBOL_REF_FUNCTION_P (orig
)
8095 || arm_fdpic_local_funcdesc_p (orig
))))
8097 && arm_pic_data_is_text_relative
)
8098 insn
= arm_pic_static_addr (orig
, reg
);
8101 /* If this function doesn't have a pic register, create one now. */
8102 require_pic_register (pic_reg
, compute_now
);
8104 if (pic_reg
== NULL_RTX
)
8105 pic_reg
= cfun
->machine
->pic_reg
;
8107 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8110 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8112 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
8116 else if (GET_CODE (orig
) == CONST
)
8120 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8121 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
8124 /* Handle the case where we have: const (UNSPEC_TLS). */
8125 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
8126 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
8129 /* Handle the case where we have:
8130 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8132 if (GET_CODE (XEXP (orig
, 0)) == PLUS
8133 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
8134 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
8136 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
8142 gcc_assert (can_create_pseudo_p ());
8143 reg
= gen_reg_rtx (Pmode
);
8146 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
8148 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
,
8149 pic_reg
, compute_now
);
8150 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
8151 base
== reg
? 0 : reg
, pic_reg
,
8154 if (CONST_INT_P (offset
))
8156 /* The base register doesn't really matter, we only want to
8157 test the index for the appropriate mode. */
8158 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
8160 gcc_assert (can_create_pseudo_p ());
8161 offset
= force_reg (Pmode
, offset
);
8164 if (CONST_INT_P (offset
))
8165 return plus_constant (Pmode
, base
, INTVAL (offset
));
8168 if (GET_MODE_SIZE (mode
) > 4
8169 && (GET_MODE_CLASS (mode
) == MODE_INT
8170 || TARGET_SOFT_FLOAT
))
8172 emit_insn (gen_addsi3 (reg
, base
, offset
));
8176 return gen_rtx_PLUS (Pmode
, base
, offset
);
8183 /* Generate insns that produce the address of the stack canary */
8185 arm_stack_protect_tls_canary_mem (bool reload
)
8187 rtx tp
= gen_reg_rtx (SImode
);
8189 emit_insn (gen_reload_tp_hard (tp
));
8191 emit_insn (gen_load_tp_hard (tp
));
8193 rtx reg
= gen_reg_rtx (SImode
);
8194 rtx offset
= GEN_INT (arm_stack_protector_guard_offset
);
8195 emit_set_insn (reg
, gen_rtx_PLUS (SImode
, tp
, offset
));
8196 return gen_rtx_MEM (SImode
, reg
);
8200 /* Whether a register is callee saved or not. This is necessary because high
8201 registers are marked as caller saved when optimizing for size on Thumb-1
8202 targets despite being callee saved in order to avoid using them. */
8203 #define callee_saved_reg_p(reg) \
8204 (!call_used_or_fixed_reg_p (reg) \
8205 || (TARGET_THUMB1 && optimize_size \
8206 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8208 /* Return a mask for the call-clobbered low registers that are unused
8209 at the end of the prologue. */
8210 static unsigned long
8211 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8213 unsigned long mask
= 0;
8214 bitmap prologue_live_out
= df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8216 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8217 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (prologue_live_out
, reg
))
8218 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8222 /* Similarly for the start of the epilogue. */
8223 static unsigned long
8224 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8226 unsigned long mask
= 0;
8227 bitmap epilogue_live_in
= df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun
));
8229 for (int reg
= FIRST_LO_REGNUM
; reg
<= LAST_LO_REGNUM
; reg
++)
8230 if (!callee_saved_reg_p (reg
) && !REGNO_REG_SET_P (epilogue_live_in
, reg
))
8231 mask
|= 1 << (reg
- FIRST_LO_REGNUM
);
8235 /* Find a spare register to use during the prolog of a function. */
8238 thumb_find_work_register (unsigned long pushed_regs_mask
)
8242 unsigned long unused_regs
8243 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8245 /* Check the argument registers first as these are call-used. The
8246 register allocation order means that sometimes r3 might be used
8247 but earlier argument registers might not, so check them all. */
8248 for (reg
= LAST_LO_REGNUM
; reg
>= FIRST_LO_REGNUM
; reg
--)
8249 if (unused_regs
& (1 << (reg
- FIRST_LO_REGNUM
)))
8252 /* Otherwise look for a call-saved register that is going to be pushed. */
8253 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
8254 if (pushed_regs_mask
& (1 << reg
))
8259 /* Thumb-2 can use high regs. */
8260 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
8261 if (pushed_regs_mask
& (1 << reg
))
8264 /* Something went wrong - thumb_compute_save_reg_mask()
8265 should have arranged for a suitable register to be pushed. */
8269 static GTY(()) int pic_labelno
;
8271 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8275 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
, rtx pic_reg
)
8277 rtx l1
, labelno
, pic_tmp
, pic_rtx
;
8279 if (crtl
->uses_pic_offset_table
== 0
8280 || TARGET_SINGLE_PIC_BASE
8284 gcc_assert (flag_pic
);
8286 if (pic_reg
== NULL_RTX
)
8287 pic_reg
= cfun
->machine
->pic_reg
;
8288 if (TARGET_VXWORKS_RTP
)
8290 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
8291 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8292 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
8294 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
8296 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8297 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
8301 /* We use an UNSPEC rather than a LABEL_REF because this label
8302 never appears in the code stream. */
8304 labelno
= GEN_INT (pic_labelno
++);
8305 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8306 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8308 /* On the ARM the PC register contains 'dot + 8' at the time of the
8309 addition, on the Thumb it is 'dot + 4'. */
8310 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8311 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
8313 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
8317 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8319 else /* TARGET_THUMB1 */
8321 if (arm_pic_register
!= INVALID_REGNUM
8322 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
8324 /* We will have pushed the pic register, so we should always be
8325 able to find a work register. */
8326 pic_tmp
= gen_rtx_REG (SImode
,
8327 thumb_find_work_register (saved_regs
));
8328 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
8329 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
8330 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
8332 else if (arm_pic_register
!= INVALID_REGNUM
8333 && arm_pic_register
> LAST_LO_REGNUM
8334 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
8336 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8337 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
8338 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
8341 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
8345 /* Need to emit this whether or not we obey regdecls,
8346 since setjmp/longjmp can cause life info to screw up. */
8350 /* Try to determine whether an object, referenced via ORIG, will be
8351 placed in the text or data segment. This is used in FDPIC mode, to
8352 decide which relocations to use when accessing ORIG. *IS_READONLY
8353 is set to true if ORIG is a read-only location, false otherwise.
8354 Return true if we could determine the location of ORIG, false
8355 otherwise. *IS_READONLY is valid only when we return true. */
8357 arm_is_segment_info_known (rtx orig
, bool *is_readonly
)
8359 *is_readonly
= false;
8361 if (LABEL_REF_P (orig
))
8363 *is_readonly
= true;
8367 if (SYMBOL_REF_P (orig
))
8369 if (CONSTANT_POOL_ADDRESS_P (orig
))
8371 *is_readonly
= true;
8374 if (SYMBOL_REF_LOCAL_P (orig
)
8375 && !SYMBOL_REF_EXTERNAL_P (orig
)
8376 && SYMBOL_REF_DECL (orig
)
8377 && (!DECL_P (SYMBOL_REF_DECL (orig
))
8378 || !DECL_COMMON (SYMBOL_REF_DECL (orig
))))
8380 tree decl
= SYMBOL_REF_DECL (orig
);
8381 tree init
= (TREE_CODE (decl
) == VAR_DECL
)
8382 ? DECL_INITIAL (decl
) : (TREE_CODE (decl
) == CONSTRUCTOR
)
8385 bool named_section
, readonly
;
8387 if (init
&& init
!= error_mark_node
)
8388 reloc
= compute_reloc_for_constant (init
);
8390 named_section
= TREE_CODE (decl
) == VAR_DECL
8391 && lookup_attribute ("section", DECL_ATTRIBUTES (decl
));
8392 readonly
= decl_readonly_section (decl
, reloc
);
8394 /* We don't know where the link script will put a named
8395 section, so return false in such a case. */
8399 *is_readonly
= readonly
;
8403 /* We don't know. */
8410 /* Generate code to load the address of a static var when flag_pic is set. */
8412 arm_pic_static_addr (rtx orig
, rtx reg
)
8414 rtx l1
, labelno
, offset_rtx
;
8417 gcc_assert (flag_pic
);
8419 bool is_readonly
= false;
8420 bool info_known
= false;
8423 && SYMBOL_REF_P (orig
)
8424 && !SYMBOL_REF_FUNCTION_P (orig
))
8425 info_known
= arm_is_segment_info_known (orig
, &is_readonly
);
8428 && SYMBOL_REF_P (orig
)
8429 && !SYMBOL_REF_FUNCTION_P (orig
)
8432 /* We don't know where orig is stored, so we have be
8433 pessimistic and use a GOT relocation. */
8434 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8436 insn
= calculate_pic_address_constant (reg
, pic_reg
, orig
);
8438 else if (TARGET_FDPIC
8439 && SYMBOL_REF_P (orig
)
8440 && (SYMBOL_REF_FUNCTION_P (orig
)
8443 /* We use the GOTOFF relocation. */
8444 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
8446 rtx l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig
), UNSPEC_PIC_SYM
);
8447 emit_insn (gen_movsi (reg
, l1
));
8448 insn
= emit_insn (gen_addsi3 (reg
, reg
, pic_reg
));
8452 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8453 PC-relative access. */
8454 /* We use an UNSPEC rather than a LABEL_REF because this label
8455 never appears in the code stream. */
8456 labelno
= GEN_INT (pic_labelno
++);
8457 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8458 l1
= gen_rtx_CONST (VOIDmode
, l1
);
8460 /* On the ARM the PC register contains 'dot + 8' at the time of the
8461 addition, on the Thumb it is 'dot + 4'. */
8462 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
8463 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
8464 UNSPEC_SYMBOL_OFFSET
);
8465 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
8467 insn
= emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
,
8474 /* Return nonzero if X is valid as an ARM state addressing register. */
8476 arm_address_register_rtx_p (rtx x
, int strict_p
)
8486 return ARM_REGNO_OK_FOR_BASE_P (regno
);
8488 return (regno
<= LAST_ARM_REGNUM
8489 || regno
>= FIRST_PSEUDO_REGISTER
8490 || regno
== FRAME_POINTER_REGNUM
8491 || regno
== ARG_POINTER_REGNUM
);
8494 /* Return TRUE if this rtx is the difference of a symbol and a label,
8495 and will reduce to a PC-relative relocation in the object file.
8496 Expressions like this can be left alone when generating PIC, rather
8497 than forced through the GOT. */
8499 pcrel_constant_p (rtx x
)
8501 if (GET_CODE (x
) == MINUS
)
8502 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
8507 /* Return true if X will surely end up in an index register after next
8510 will_be_in_index_register (const_rtx x
)
8512 /* arm.md: calculate_pic_address will split this into a register. */
8513 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
8516 /* Return nonzero if X is a valid ARM state address operand. */
8518 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
8522 enum rtx_code code
= GET_CODE (x
);
8524 if (arm_address_register_rtx_p (x
, strict_p
))
8527 use_ldrd
= (TARGET_LDRD
8528 && (mode
== DImode
|| mode
== DFmode
));
8530 if (code
== POST_INC
|| code
== PRE_DEC
8531 || ((code
== PRE_INC
|| code
== POST_DEC
)
8532 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8533 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8535 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8536 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8537 && GET_CODE (XEXP (x
, 1)) == PLUS
8538 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8540 rtx addend
= XEXP (XEXP (x
, 1), 1);
8542 /* Don't allow ldrd post increment by register because it's hard
8543 to fixup invalid register choices. */
8545 && GET_CODE (x
) == POST_MODIFY
8549 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
8550 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
8553 /* After reload constants split into minipools will have addresses
8554 from a LABEL_REF. */
8555 else if (reload_completed
8556 && (code
== LABEL_REF
8558 && GET_CODE (XEXP (x
, 0)) == PLUS
8559 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8560 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8563 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
8566 else if (code
== PLUS
)
8568 rtx xop0
= XEXP (x
, 0);
8569 rtx xop1
= XEXP (x
, 1);
8571 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8572 && ((CONST_INT_P (xop1
)
8573 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
8574 || (!strict_p
&& will_be_in_index_register (xop1
))))
8575 || (arm_address_register_rtx_p (xop1
, strict_p
)
8576 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
8580 /* Reload currently can't handle MINUS, so disable this for now */
8581 else if (GET_CODE (x
) == MINUS
)
8583 rtx xop0
= XEXP (x
, 0);
8584 rtx xop1
= XEXP (x
, 1);
8586 return (arm_address_register_rtx_p (xop0
, strict_p
)
8587 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
8591 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8592 && code
== SYMBOL_REF
8593 && CONSTANT_POOL_ADDRESS_P (x
)
8595 && symbol_mentioned_p (get_pool_constant (x
))
8596 && ! pcrel_constant_p (get_pool_constant (x
))))
8602 /* Return true if we can avoid creating a constant pool entry for x. */
8604 can_avoid_literal_pool_for_label_p (rtx x
)
8606 /* Normally we can assign constant values to target registers without
8607 the help of constant pool. But there are cases we have to use constant
8609 1) assign a label to register.
8610 2) sign-extend a 8bit value to 32bit and then assign to register.
8612 Constant pool access in format:
8613 (set (reg r0) (mem (symbol_ref (".LC0"))))
8614 will cause the use of literal pool (later in function arm_reorg).
8615 So here we mark such format as an invalid format, then the compiler
8616 will adjust it into:
8617 (set (reg r0) (symbol_ref (".LC0")))
8618 (set (reg r0) (mem (reg r0))).
8619 No extra register is required, and (mem (reg r0)) won't cause the use
8620 of literal pools. */
8621 if (arm_disable_literal_pool
&& SYMBOL_REF_P (x
)
8622 && CONSTANT_POOL_ADDRESS_P (x
))
8628 /* Return nonzero if X is a valid Thumb-2 address operand. */
8630 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8633 enum rtx_code code
= GET_CODE (x
);
8635 if (TARGET_HAVE_MVE
&& VALID_MVE_MODE (mode
))
8636 return mve_vector_mem_operand (mode
, x
, strict_p
);
8638 if (arm_address_register_rtx_p (x
, strict_p
))
8641 use_ldrd
= (TARGET_LDRD
8642 && (mode
== DImode
|| mode
== DFmode
));
8644 if (code
== POST_INC
|| code
== PRE_DEC
8645 || ((code
== PRE_INC
|| code
== POST_DEC
)
8646 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
8647 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
8649 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
8650 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
8651 && GET_CODE (XEXP (x
, 1)) == PLUS
8652 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
8654 /* Thumb-2 only has autoincrement by constant. */
8655 rtx addend
= XEXP (XEXP (x
, 1), 1);
8656 HOST_WIDE_INT offset
;
8658 if (!CONST_INT_P (addend
))
8661 offset
= INTVAL(addend
);
8662 if (GET_MODE_SIZE (mode
) <= 4)
8663 return (offset
> -256 && offset
< 256);
8665 return (use_ldrd
&& offset
> -1024 && offset
< 1024
8666 && (offset
& 3) == 0);
8669 /* After reload constants split into minipools will have addresses
8670 from a LABEL_REF. */
8671 else if (reload_completed
8672 && (code
== LABEL_REF
8674 && GET_CODE (XEXP (x
, 0)) == PLUS
8675 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8676 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8679 else if (mode
== TImode
8680 || (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
8681 || (TARGET_HAVE_MVE
&& VALID_MVE_STRUCT_MODE (mode
)))
8684 else if (code
== PLUS
)
8686 rtx xop0
= XEXP (x
, 0);
8687 rtx xop1
= XEXP (x
, 1);
8689 return ((arm_address_register_rtx_p (xop0
, strict_p
)
8690 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
8691 || (!strict_p
&& will_be_in_index_register (xop1
))))
8692 || (arm_address_register_rtx_p (xop1
, strict_p
)
8693 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
8696 else if (can_avoid_literal_pool_for_label_p (x
))
8699 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8700 && code
== SYMBOL_REF
8701 && CONSTANT_POOL_ADDRESS_P (x
)
8703 && symbol_mentioned_p (get_pool_constant (x
))
8704 && ! pcrel_constant_p (get_pool_constant (x
))))
8710 /* Return nonzero if INDEX is valid for an address index operand in
8713 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
8716 HOST_WIDE_INT range
;
8717 enum rtx_code code
= GET_CODE (index
);
8719 /* Standard coprocessor addressing modes. */
8720 if (TARGET_HARD_FLOAT
8721 && (mode
== SFmode
|| mode
== DFmode
))
8722 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8723 && INTVAL (index
) > -1024
8724 && (INTVAL (index
) & 3) == 0);
8726 /* For quad modes, we restrict the constant offset to be slightly less
8727 than what the instruction format permits. We do this because for
8728 quad mode moves, we will actually decompose them into two separate
8729 double-mode reads or writes. INDEX must therefore be a valid
8730 (double-mode) offset and so should INDEX+8. */
8731 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8732 return (code
== CONST_INT
8733 && INTVAL (index
) < 1016
8734 && INTVAL (index
) > -1024
8735 && (INTVAL (index
) & 3) == 0);
8737 /* We have no such constraint on double mode offsets, so we permit the
8738 full range of the instruction format. */
8739 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8740 return (code
== CONST_INT
8741 && INTVAL (index
) < 1024
8742 && INTVAL (index
) > -1024
8743 && (INTVAL (index
) & 3) == 0);
8745 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8746 return (code
== CONST_INT
8747 && INTVAL (index
) < 1024
8748 && INTVAL (index
) > -1024
8749 && (INTVAL (index
) & 3) == 0);
8751 if (arm_address_register_rtx_p (index
, strict_p
)
8752 && (GET_MODE_SIZE (mode
) <= 4))
8755 if (mode
== DImode
|| mode
== DFmode
)
8757 if (code
== CONST_INT
)
8759 HOST_WIDE_INT val
= INTVAL (index
);
8761 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8762 If vldr is selected it uses arm_coproc_mem_operand. */
8764 return val
> -256 && val
< 256;
8766 return val
> -4096 && val
< 4092;
8769 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
8772 if (GET_MODE_SIZE (mode
) <= 4
8776 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
8780 rtx xiop0
= XEXP (index
, 0);
8781 rtx xiop1
= XEXP (index
, 1);
8783 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8784 && power_of_two_operand (xiop1
, SImode
))
8785 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8786 && power_of_two_operand (xiop0
, SImode
)));
8788 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
8789 || code
== ASHIFT
|| code
== ROTATERT
)
8791 rtx op
= XEXP (index
, 1);
8793 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8796 && INTVAL (op
) <= 31);
8800 /* For ARM v4 we may be doing a sign-extend operation during the
8806 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
8812 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8814 return (code
== CONST_INT
8815 && INTVAL (index
) < range
8816 && INTVAL (index
) > -range
);
8819 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8820 index operand. i.e. 1, 2, 4 or 8. */
8822 thumb2_index_mul_operand (rtx op
)
8826 if (!CONST_INT_P (op
))
8830 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8833 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8835 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8837 enum rtx_code code
= GET_CODE (index
);
8839 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8840 /* Standard coprocessor addressing modes. */
8842 && (mode
== SFmode
|| mode
== DFmode
))
8843 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8844 /* Thumb-2 allows only > -256 index range for it's core register
8845 load/stores. Since we allow SF/DF in core registers, we have
8846 to use the intersection between -256~4096 (core) and -1024~1024
8848 && INTVAL (index
) > -256
8849 && (INTVAL (index
) & 3) == 0);
8851 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8853 /* For DImode assume values will usually live in core regs
8854 and only allow LDRD addressing modes. */
8855 if (!TARGET_LDRD
|| mode
!= DImode
)
8856 return (code
== CONST_INT
8857 && INTVAL (index
) < 1024
8858 && INTVAL (index
) > -1024
8859 && (INTVAL (index
) & 3) == 0);
8862 /* For quad modes, we restrict the constant offset to be slightly less
8863 than what the instruction format permits. We do this because for
8864 quad mode moves, we will actually decompose them into two separate
8865 double-mode reads or writes. INDEX must therefore be a valid
8866 (double-mode) offset and so should INDEX+8. */
8867 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8868 return (code
== CONST_INT
8869 && INTVAL (index
) < 1016
8870 && INTVAL (index
) > -1024
8871 && (INTVAL (index
) & 3) == 0);
8873 /* We have no such constraint on double mode offsets, so we permit the
8874 full range of the instruction format. */
8875 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8876 return (code
== CONST_INT
8877 && INTVAL (index
) < 1024
8878 && INTVAL (index
) > -1024
8879 && (INTVAL (index
) & 3) == 0);
8881 if (arm_address_register_rtx_p (index
, strict_p
)
8882 && (GET_MODE_SIZE (mode
) <= 4))
8885 if (mode
== DImode
|| mode
== DFmode
)
8887 if (code
== CONST_INT
)
8889 HOST_WIDE_INT val
= INTVAL (index
);
8890 /* Thumb-2 ldrd only has reg+const addressing modes.
8891 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8892 If vldr is selected it uses arm_coproc_mem_operand. */
8894 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8896 return IN_RANGE (val
, -255, 4095 - 4);
8904 rtx xiop0
= XEXP (index
, 0);
8905 rtx xiop1
= XEXP (index
, 1);
8907 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8908 && thumb2_index_mul_operand (xiop1
))
8909 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8910 && thumb2_index_mul_operand (xiop0
)));
8912 else if (code
== ASHIFT
)
8914 rtx op
= XEXP (index
, 1);
8916 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8919 && INTVAL (op
) <= 3);
8922 return (code
== CONST_INT
8923 && INTVAL (index
) < 4096
8924 && INTVAL (index
) > -256);
8927 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8929 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8939 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8941 return (regno
<= LAST_LO_REGNUM
8942 || regno
> LAST_VIRTUAL_REGISTER
8943 || regno
== FRAME_POINTER_REGNUM
8944 || (GET_MODE_SIZE (mode
) >= 4
8945 && (regno
== STACK_POINTER_REGNUM
8946 || regno
>= FIRST_PSEUDO_REGISTER
8947 || x
== hard_frame_pointer_rtx
8948 || x
== arg_pointer_rtx
)));
8951 /* Return nonzero if x is a legitimate index register. This is the case
8952 for any base register that can access a QImode object. */
8954 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8956 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8959 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8961 The AP may be eliminated to either the SP or the FP, so we use the
8962 least common denominator, e.g. SImode, and offsets from 0 to 64.
8964 ??? Verify whether the above is the right approach.
8966 ??? Also, the FP may be eliminated to the SP, so perhaps that
8967 needs special handling also.
8969 ??? Look at how the mips16 port solves this problem. It probably uses
8970 better ways to solve some of these problems.
8972 Although it is not incorrect, we don't accept QImode and HImode
8973 addresses based on the frame pointer or arg pointer until the
8974 reload pass starts. This is so that eliminating such addresses
8975 into stack based ones won't produce impossible code. */
8977 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8979 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8982 /* ??? Not clear if this is right. Experiment. */
8983 if (GET_MODE_SIZE (mode
) < 4
8984 && !(reload_in_progress
|| reload_completed
)
8985 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8986 || reg_mentioned_p (arg_pointer_rtx
, x
)
8987 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8988 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8989 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8990 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8993 /* Accept any base register. SP only in SImode or larger. */
8994 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8997 /* This is PC relative data before arm_reorg runs. */
8998 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
9000 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
9001 && !arm_disable_literal_pool
)
9004 /* This is PC relative data after arm_reorg runs. */
9005 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
9008 || (GET_CODE (x
) == CONST
9009 && GET_CODE (XEXP (x
, 0)) == PLUS
9010 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
9011 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
9014 /* Post-inc indexing only supported for SImode and larger. */
9015 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
9016 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
9019 else if (GET_CODE (x
) == PLUS
)
9021 /* REG+REG address can be any two index registers. */
9022 /* We disallow FRAME+REG addressing since we know that FRAME
9023 will be replaced with STACK, and SP relative addressing only
9024 permits SP+OFFSET. */
9025 if (GET_MODE_SIZE (mode
) <= 4
9026 && XEXP (x
, 0) != frame_pointer_rtx
9027 && XEXP (x
, 1) != frame_pointer_rtx
9028 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9029 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
9030 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
9033 /* REG+const has 5-7 bit offset for non-SP registers. */
9034 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
9035 || XEXP (x
, 0) == arg_pointer_rtx
)
9036 && CONST_INT_P (XEXP (x
, 1))
9037 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
9040 /* REG+const has 10-bit offset for SP, but only SImode and
9041 larger is supported. */
9042 /* ??? Should probably check for DI/DFmode overflow here
9043 just like GO_IF_LEGITIMATE_OFFSET does. */
9044 else if (REG_P (XEXP (x
, 0))
9045 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
9046 && GET_MODE_SIZE (mode
) >= 4
9047 && CONST_INT_P (XEXP (x
, 1))
9048 && INTVAL (XEXP (x
, 1)) >= 0
9049 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
9050 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9053 else if (REG_P (XEXP (x
, 0))
9054 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
9055 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
9056 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
9057 && REGNO (XEXP (x
, 0))
9058 <= LAST_VIRTUAL_POINTER_REGISTER
))
9059 && GET_MODE_SIZE (mode
) >= 4
9060 && CONST_INT_P (XEXP (x
, 1))
9061 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
9065 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
9066 && GET_MODE_SIZE (mode
) == 4
9068 && CONSTANT_POOL_ADDRESS_P (x
)
9069 && !arm_disable_literal_pool
9071 && symbol_mentioned_p (get_pool_constant (x
))
9072 && ! pcrel_constant_p (get_pool_constant (x
))))
9078 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9079 instruction of mode MODE. */
9081 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
9083 switch (GET_MODE_SIZE (mode
))
9086 return val
>= 0 && val
< 32;
9089 return val
>= 0 && val
< 64 && (val
& 1) == 0;
9093 && (val
+ GET_MODE_SIZE (mode
)) <= 128
9099 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
9102 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
9103 else if (TARGET_THUMB2
)
9104 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
9105 else /* if (TARGET_THUMB1) */
9106 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
9109 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9111 Given an rtx X being reloaded into a reg required to be
9112 in class CLASS, return the class of reg to actually use.
9113 In general this is just CLASS, but for the Thumb core registers and
9114 immediate constants we prefer a LO_REGS class or a subset. */
9117 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
9123 if (rclass
== GENERAL_REGS
)
9130 /* Build the SYMBOL_REF for __tls_get_addr. */
9132 static GTY(()) rtx tls_get_addr_libfunc
;
9135 get_tls_get_addr (void)
9137 if (!tls_get_addr_libfunc
)
9138 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
9139 return tls_get_addr_libfunc
;
9143 arm_load_tp (rtx target
)
9146 target
= gen_reg_rtx (SImode
);
9150 /* Can return in any reg. */
9151 emit_insn (gen_load_tp_hard (target
));
9155 /* Always returned in r0. Immediately copy the result into a pseudo,
9156 otherwise other uses of r0 (e.g. setting up function arguments) may
9157 clobber the value. */
9163 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
9164 rtx initial_fdpic_reg
= get_hard_reg_initial_val (Pmode
, FDPIC_REGNUM
);
9166 emit_insn (gen_load_tp_soft_fdpic ());
9169 emit_insn (gen_restore_pic_register_after_call(fdpic_reg
, initial_fdpic_reg
));
9172 emit_insn (gen_load_tp_soft ());
9174 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
9175 emit_move_insn (target
, tmp
);
9181 load_tls_operand (rtx x
, rtx reg
)
9185 if (reg
== NULL_RTX
)
9186 reg
= gen_reg_rtx (SImode
);
9188 tmp
= gen_rtx_CONST (SImode
, x
);
9190 emit_move_insn (reg
, tmp
);
9196 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
9198 rtx label
, labelno
= NULL_RTX
, sum
;
9200 gcc_assert (reloc
!= TLS_DESCSEQ
);
9205 sum
= gen_rtx_UNSPEC (Pmode
,
9206 gen_rtvec (2, x
, GEN_INT (reloc
)),
9211 labelno
= GEN_INT (pic_labelno
++);
9212 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9213 label
= gen_rtx_CONST (VOIDmode
, label
);
9215 sum
= gen_rtx_UNSPEC (Pmode
,
9216 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
9217 GEN_INT (TARGET_ARM
? 8 : 4)),
9220 reg
= load_tls_operand (sum
, reg
);
9223 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9224 else if (TARGET_ARM
)
9225 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
9227 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9229 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
9230 LCT_PURE
, /* LCT_CONST? */
9233 rtx_insn
*insns
= get_insns ();
9240 arm_tls_descseq_addr (rtx x
, rtx reg
)
9242 rtx labelno
= GEN_INT (pic_labelno
++);
9243 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9244 rtx sum
= gen_rtx_UNSPEC (Pmode
,
9245 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
9246 gen_rtx_CONST (VOIDmode
, label
),
9247 GEN_INT (!TARGET_ARM
)),
9249 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
9251 emit_insn (gen_tlscall (x
, labelno
));
9253 reg
= gen_reg_rtx (SImode
);
9255 gcc_assert (REGNO (reg
) != R0_REGNUM
);
9257 emit_move_insn (reg
, reg0
);
9264 legitimize_tls_address (rtx x
, rtx reg
)
9266 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
9268 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
9272 case TLS_MODEL_GLOBAL_DYNAMIC
:
9273 if (TARGET_GNU2_TLS
)
9275 gcc_assert (!TARGET_FDPIC
);
9277 reg
= arm_tls_descseq_addr (x
, reg
);
9279 tp
= arm_load_tp (NULL_RTX
);
9281 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9285 /* Original scheme */
9287 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32_FDPIC
);
9289 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
9290 dest
= gen_reg_rtx (Pmode
);
9291 emit_libcall_block (insns
, dest
, ret
, x
);
9295 case TLS_MODEL_LOCAL_DYNAMIC
:
9296 if (TARGET_GNU2_TLS
)
9298 gcc_assert (!TARGET_FDPIC
);
9300 reg
= arm_tls_descseq_addr (x
, reg
);
9302 tp
= arm_load_tp (NULL_RTX
);
9304 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
9309 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32_FDPIC
);
9311 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
9313 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9314 share the LDM result with other LD model accesses. */
9315 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
9317 dest
= gen_reg_rtx (Pmode
);
9318 emit_libcall_block (insns
, dest
, ret
, eqv
);
9320 /* Load the addend. */
9321 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
9322 GEN_INT (TLS_LDO32
)),
9324 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
9325 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
9329 case TLS_MODEL_INITIAL_EXEC
:
9332 sum
= gen_rtx_UNSPEC (Pmode
,
9333 gen_rtvec (2, x
, GEN_INT (TLS_IE32_FDPIC
)),
9335 reg
= load_tls_operand (sum
, reg
);
9336 emit_insn (gen_addsi3 (reg
, reg
, gen_rtx_REG (Pmode
, FDPIC_REGNUM
)));
9337 emit_move_insn (reg
, gen_rtx_MEM (Pmode
, reg
));
9341 labelno
= GEN_INT (pic_labelno
++);
9342 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
9343 label
= gen_rtx_CONST (VOIDmode
, label
);
9344 sum
= gen_rtx_UNSPEC (Pmode
,
9345 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
9346 GEN_INT (TARGET_ARM
? 8 : 4)),
9348 reg
= load_tls_operand (sum
, reg
);
9351 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
9352 else if (TARGET_THUMB2
)
9353 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
9356 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
9357 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
9361 tp
= arm_load_tp (NULL_RTX
);
9363 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9365 case TLS_MODEL_LOCAL_EXEC
:
9366 tp
= arm_load_tp (NULL_RTX
);
9368 reg
= gen_rtx_UNSPEC (Pmode
,
9369 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
9371 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
9373 return gen_rtx_PLUS (Pmode
, tp
, reg
);
9380 /* Try machine-dependent ways of modifying an illegitimate address
9381 to be legitimate. If we find one, return the new, valid address. */
9383 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9385 if (arm_tls_referenced_p (x
))
9389 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
9391 addend
= XEXP (XEXP (x
, 0), 1);
9392 x
= XEXP (XEXP (x
, 0), 0);
9395 if (!SYMBOL_REF_P (x
))
9398 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
9400 x
= legitimize_tls_address (x
, NULL_RTX
);
9404 x
= gen_rtx_PLUS (SImode
, x
, addend
);
9412 return thumb_legitimize_address (x
, orig_x
, mode
);
9414 if (GET_CODE (x
) == PLUS
)
9416 rtx xop0
= XEXP (x
, 0);
9417 rtx xop1
= XEXP (x
, 1);
9419 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
9420 xop0
= force_reg (SImode
, xop0
);
9422 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
9423 && !symbol_mentioned_p (xop1
))
9424 xop1
= force_reg (SImode
, xop1
);
9426 if (ARM_BASE_REGISTER_RTX_P (xop0
)
9427 && CONST_INT_P (xop1
))
9429 HOST_WIDE_INT n
, low_n
;
9433 /* VFP addressing modes actually allow greater offsets, but for
9434 now we just stick with the lowest common denominator. */
9435 if (mode
== DImode
|| mode
== DFmode
)
9447 low_n
= ((mode
) == TImode
? 0
9448 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
9452 base_reg
= gen_reg_rtx (SImode
);
9453 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
9454 emit_move_insn (base_reg
, val
);
9455 x
= plus_constant (Pmode
, base_reg
, low_n
);
9457 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9458 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9461 /* XXX We don't allow MINUS any more -- see comment in
9462 arm_legitimate_address_outer_p (). */
9463 else if (GET_CODE (x
) == MINUS
)
9465 rtx xop0
= XEXP (x
, 0);
9466 rtx xop1
= XEXP (x
, 1);
9468 if (CONSTANT_P (xop0
))
9469 xop0
= force_reg (SImode
, xop0
);
9471 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
9472 xop1
= force_reg (SImode
, xop1
);
9474 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
9475 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
9478 /* Make sure to take full advantage of the pre-indexed addressing mode
9479 with absolute addresses which often allows for the base register to
9480 be factorized for multiple adjacent memory references, and it might
9481 even allows for the mini pool to be avoided entirely. */
9482 else if (CONST_INT_P (x
) && optimize
> 0)
9485 HOST_WIDE_INT mask
, base
, index
;
9488 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9489 only use a 8-bit index. So let's use a 12-bit index for
9490 SImode only and hope that arm_gen_constant will enable LDRB
9491 to use more bits. */
9492 bits
= (mode
== SImode
) ? 12 : 8;
9493 mask
= (1 << bits
) - 1;
9494 base
= INTVAL (x
) & ~mask
;
9495 index
= INTVAL (x
) & mask
;
9496 if (TARGET_ARM
&& bit_count (base
& 0xffffffff) > (32 - bits
)/2)
9498 /* It'll most probably be more efficient to generate the
9499 base with more bits set and use a negative index instead.
9500 Don't do this for Thumb as negative offsets are much more
9505 base_reg
= force_reg (SImode
, GEN_INT (base
));
9506 x
= plus_constant (Pmode
, base_reg
, index
);
9511 /* We need to find and carefully transform any SYMBOL and LABEL
9512 references; so go back to the original address expression. */
9513 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9514 false /*compute_now*/);
9516 if (new_x
!= orig_x
)
9524 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9525 to be legitimate. If we find one, return the new, valid address. */
9527 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
9529 if (GET_CODE (x
) == PLUS
9530 && CONST_INT_P (XEXP (x
, 1))
9531 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
9532 || INTVAL (XEXP (x
, 1)) < 0))
9534 rtx xop0
= XEXP (x
, 0);
9535 rtx xop1
= XEXP (x
, 1);
9536 HOST_WIDE_INT offset
= INTVAL (xop1
);
9538 /* Try and fold the offset into a biasing of the base register and
9539 then offsetting that. Don't do this when optimizing for space
9540 since it can cause too many CSEs. */
9541 if (optimize_size
&& offset
>= 0
9542 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
9544 HOST_WIDE_INT delta
;
9547 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
9548 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
9549 delta
= 31 * GET_MODE_SIZE (mode
);
9551 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
9553 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
9555 x
= plus_constant (Pmode
, xop0
, delta
);
9557 else if (offset
< 0 && offset
> -256)
9558 /* Small negative offsets are best done with a subtract before the
9559 dereference, forcing these into a register normally takes two
9561 x
= force_operand (x
, NULL_RTX
);
9564 /* For the remaining cases, force the constant into a register. */
9565 xop1
= force_reg (SImode
, xop1
);
9566 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
9569 else if (GET_CODE (x
) == PLUS
9570 && s_register_operand (XEXP (x
, 1), SImode
)
9571 && !s_register_operand (XEXP (x
, 0), SImode
))
9573 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
9575 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
9580 /* We need to find and carefully transform any SYMBOL and LABEL
9581 references; so go back to the original address expression. */
9582 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
, NULL_RTX
,
9583 false /*compute_now*/);
9585 if (new_x
!= orig_x
)
9592 /* Return TRUE if X contains any TLS symbol references. */
9595 arm_tls_referenced_p (rtx x
)
9597 if (! TARGET_HAVE_TLS
)
9600 subrtx_iterator::array_type array
;
9601 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
9603 const_rtx x
= *iter
;
9604 if (SYMBOL_REF_P (x
) && SYMBOL_REF_TLS_MODEL (x
) != 0)
9606 /* ARM currently does not provide relocations to encode TLS variables
9607 into AArch32 instructions, only data, so there is no way to
9608 currently implement these if a literal pool is disabled. */
9609 if (arm_disable_literal_pool
)
9610 sorry ("accessing thread-local storage is not currently supported "
9611 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9616 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9617 TLS offsets, not real symbol references. */
9618 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9619 iter
.skip_subrtxes ();
9624 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9626 On the ARM, allow any integer (invalid ones are removed later by insn
9627 patterns), nice doubles and symbol_refs which refer to the function's
9630 When generating pic allow anything. */
9633 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
9635 if (GET_CODE (x
) == CONST_VECTOR
&& !neon_make_constant (x
, false))
9638 return flag_pic
|| !label_mentioned_p (x
);
9642 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9644 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9645 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9646 for ARMv8-M Baseline or later the result is valid. */
9647 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
9650 return (CONST_INT_P (x
)
9651 || CONST_DOUBLE_P (x
)
9652 || CONSTANT_ADDRESS_P (x
)
9653 || (TARGET_HAVE_MOVT
&& SYMBOL_REF_P (x
))
9654 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9655 we build the symbol address with upper/lower
9658 && !label_mentioned_p (x
)
9659 && arm_valid_symbolic_address_p (x
)
9660 && arm_disable_literal_pool
)
9665 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
9667 return (!arm_cannot_force_const_mem (mode
, x
)
9669 ? arm_legitimate_constant_p_1 (mode
, x
)
9670 : thumb_legitimate_constant_p (mode
, x
)));
9673 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9676 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9679 split_const (x
, &base
, &offset
);
9681 if (SYMBOL_REF_P (base
))
9683 /* Function symbols cannot have an offset due to the Thumb bit. */
9684 if ((SYMBOL_REF_FLAGS (base
) & SYMBOL_FLAG_FUNCTION
)
9685 && INTVAL (offset
) != 0)
9688 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9689 && !offset_within_block_p (base
, INTVAL (offset
)))
9692 return arm_tls_referenced_p (x
);
9695 #define REG_OR_SUBREG_REG(X) \
9697 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9699 #define REG_OR_SUBREG_RTX(X) \
9700 (REG_P (X) ? (X) : SUBREG_REG (X))
9703 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9705 machine_mode mode
= GET_MODE (x
);
9714 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9721 return COSTS_N_INSNS (1);
9724 if (arm_arch6m
&& arm_m_profile_small_mul
)
9725 return COSTS_N_INSNS (32);
9727 if (CONST_INT_P (XEXP (x
, 1)))
9730 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
9737 return COSTS_N_INSNS (2) + cycles
;
9739 return COSTS_N_INSNS (1) + 16;
9742 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9744 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9745 return (COSTS_N_INSNS (words
)
9746 + 4 * ((MEM_P (SET_SRC (x
)))
9747 + MEM_P (SET_DEST (x
))));
9752 if (UINTVAL (x
) < 256
9753 /* 16-bit constant. */
9754 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
9756 if (thumb_shiftable_const (INTVAL (x
)))
9757 return COSTS_N_INSNS (2);
9758 return arm_disable_literal_pool
9760 : COSTS_N_INSNS (3);
9762 else if ((outer
== PLUS
|| outer
== COMPARE
)
9763 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9765 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9766 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9767 return COSTS_N_INSNS (1);
9768 else if (outer
== AND
)
9771 /* This duplicates the tests in the andsi3 expander. */
9772 for (i
= 9; i
<= 31; i
++)
9773 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9774 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9775 return COSTS_N_INSNS (2);
9777 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9778 || outer
== LSHIFTRT
)
9780 return COSTS_N_INSNS (2);
9786 return COSTS_N_INSNS (3);
9804 /* XXX another guess. */
9805 /* Memory costs quite a lot for the first word, but subsequent words
9806 load at the equivalent of a single insn each. */
9807 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9808 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9813 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9819 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
9820 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
9826 return total
+ COSTS_N_INSNS (1);
9828 /* Assume a two-shift sequence. Increase the cost slightly so
9829 we prefer actual shifts over an extend operation. */
9830 return total
+ 1 + COSTS_N_INSNS (2);
9837 /* Estimates the size cost of thumb1 instructions.
9838 For now most of the code is copied from thumb1_rtx_costs. We need more
9839 fine grain tuning when we have more related test cases. */
9841 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
9843 machine_mode mode
= GET_MODE (x
);
9852 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9856 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9857 defined by RTL expansion, especially for the expansion of
9859 if ((GET_CODE (XEXP (x
, 0)) == MULT
9860 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
9861 || (GET_CODE (XEXP (x
, 1)) == MULT
9862 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
9863 return COSTS_N_INSNS (2);
9868 return COSTS_N_INSNS (1);
9871 if (CONST_INT_P (XEXP (x
, 1)))
9873 /* Thumb1 mul instruction can't operate on const. We must Load it
9874 into a register first. */
9875 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
9876 /* For the targets which have a very small and high-latency multiply
9877 unit, we prefer to synthesize the mult with up to 5 instructions,
9878 giving a good balance between size and performance. */
9879 if (arm_arch6m
&& arm_m_profile_small_mul
)
9880 return COSTS_N_INSNS (5);
9882 return COSTS_N_INSNS (1) + const_size
;
9884 return COSTS_N_INSNS (1);
9887 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9889 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9890 cost
= COSTS_N_INSNS (words
);
9891 if (satisfies_constraint_J (SET_SRC (x
))
9892 || satisfies_constraint_K (SET_SRC (x
))
9893 /* Too big an immediate for a 2-byte mov, using MOVT. */
9894 || (CONST_INT_P (SET_SRC (x
))
9895 && UINTVAL (SET_SRC (x
)) >= 256
9897 && satisfies_constraint_j (SET_SRC (x
)))
9898 /* thumb1_movdi_insn. */
9899 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9900 cost
+= COSTS_N_INSNS (1);
9906 if (UINTVAL (x
) < 256)
9907 return COSTS_N_INSNS (1);
9908 /* movw is 4byte long. */
9909 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9910 return COSTS_N_INSNS (2);
9911 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9912 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9913 return COSTS_N_INSNS (2);
9914 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9915 if (thumb_shiftable_const (INTVAL (x
)))
9916 return COSTS_N_INSNS (2);
9917 return arm_disable_literal_pool
9919 : COSTS_N_INSNS (3);
9921 else if ((outer
== PLUS
|| outer
== COMPARE
)
9922 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9924 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9925 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9926 return COSTS_N_INSNS (1);
9927 else if (outer
== AND
)
9930 /* This duplicates the tests in the andsi3 expander. */
9931 for (i
= 9; i
<= 31; i
++)
9932 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9933 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9934 return COSTS_N_INSNS (2);
9936 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9937 || outer
== LSHIFTRT
)
9939 return COSTS_N_INSNS (2);
9945 return COSTS_N_INSNS (3);
9959 return COSTS_N_INSNS (1);
9962 return (COSTS_N_INSNS (1)
9964 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9965 + ((SYMBOL_REF_P (x
) && CONSTANT_POOL_ADDRESS_P (x
))
9966 ? COSTS_N_INSNS (1) : 0));
9970 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9975 /* XXX still guessing. */
9976 switch (GET_MODE (XEXP (x
, 0)))
9979 return (1 + (mode
== DImode
? 4 : 0)
9980 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9983 return (4 + (mode
== DImode
? 4 : 0)
9984 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9987 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9998 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9999 PLUS, adds the carry flag, then return the other operand. If
10000 neither is a carry, return OP unchanged. */
10002 strip_carry_operation (rtx op
)
10004 gcc_assert (GET_CODE (op
) == PLUS
);
10005 if (arm_carry_operation (XEXP (op
, 0), GET_MODE (op
)))
10006 return XEXP (op
, 1);
10007 else if (arm_carry_operation (XEXP (op
, 1), GET_MODE (op
)))
10008 return XEXP (op
, 0);
10012 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10013 operand, then return the operand that is being shifted. If the shift
10014 is not by a constant, then set SHIFT_REG to point to the operand.
10015 Return NULL if OP is not a shifter operand. */
10017 shifter_op_p (rtx op
, rtx
*shift_reg
)
10019 enum rtx_code code
= GET_CODE (op
);
10021 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
10022 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
10023 return XEXP (op
, 0);
10024 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
10025 return XEXP (op
, 0);
10026 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
10027 || code
== ASHIFTRT
)
10029 if (!CONST_INT_P (XEXP (op
, 1)))
10030 *shift_reg
= XEXP (op
, 1);
10031 return XEXP (op
, 0);
10038 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
10040 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
10041 rtx_code code
= GET_CODE (x
);
10042 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
10044 switch (XINT (x
, 1))
10046 case UNSPEC_UNALIGNED_LOAD
:
10047 /* We can only do unaligned loads into the integer unit, and we can't
10048 use LDM or LDRD. */
10049 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10051 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
10052 + extra_cost
->ldst
.load_unaligned
);
10055 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10056 ADDR_SPACE_GENERIC
, speed_p
);
10060 case UNSPEC_UNALIGNED_STORE
:
10061 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
10063 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
10064 + extra_cost
->ldst
.store_unaligned
);
10066 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
10068 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
10069 ADDR_SPACE_GENERIC
, speed_p
);
10073 case UNSPEC_VRINTZ
:
10074 case UNSPEC_VRINTP
:
10075 case UNSPEC_VRINTM
:
10076 case UNSPEC_VRINTR
:
10077 case UNSPEC_VRINTX
:
10078 case UNSPEC_VRINTA
:
10080 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
10084 *cost
= COSTS_N_INSNS (2);
10090 /* Cost of a libcall. We assume one insn per argument, an amount for the
10091 call (one insn for -Os) and then one for processing the result. */
10092 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10094 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10097 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10098 if (shift_op != NULL \
10099 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10104 *cost += extra_cost->alu.arith_shift_reg; \
10105 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10106 ASHIFT, 1, speed_p); \
10108 else if (speed_p) \
10109 *cost += extra_cost->alu.arith_shift; \
10111 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10112 ASHIFT, 0, speed_p) \
10113 + rtx_cost (XEXP (x, 1 - IDX), \
10114 GET_MODE (shift_op), \
10115 OP, 1, speed_p)); \
10121 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10122 considering the costs of the addressing mode and memory access
10125 arm_mem_costs (rtx x
, const struct cpu_cost_table
*extra_cost
,
10126 int *cost
, bool speed_p
)
10128 machine_mode mode
= GET_MODE (x
);
10130 *cost
= COSTS_N_INSNS (1);
10133 && GET_CODE (XEXP (x
, 0)) == PLUS
10134 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
10135 /* This will be split into two instructions. Add the cost of the
10136 additional instruction here. The cost of the memory access is computed
10137 below. See arm.md:calculate_pic_address. */
10138 *cost
+= COSTS_N_INSNS (1);
10140 /* Calculate cost of the addressing mode. */
10143 arm_addr_mode_op op_type
;
10144 switch (GET_CODE (XEXP (x
, 0)))
10148 op_type
= AMO_DEFAULT
;
10151 /* MINUS does not appear in RTL, but the architecture supports it,
10152 so handle this case defensively. */
10155 op_type
= AMO_NO_WB
;
10167 if (VECTOR_MODE_P (mode
))
10168 *cost
+= current_tune
->addr_mode_costs
->vector
[op_type
];
10169 else if (FLOAT_MODE_P (mode
))
10170 *cost
+= current_tune
->addr_mode_costs
->fp
[op_type
];
10172 *cost
+= current_tune
->addr_mode_costs
->integer
[op_type
];
10175 /* Calculate cost of memory access. */
10178 if (FLOAT_MODE_P (mode
))
10180 if (GET_MODE_SIZE (mode
) == 8)
10181 *cost
+= extra_cost
->ldst
.loadd
;
10183 *cost
+= extra_cost
->ldst
.loadf
;
10185 else if (VECTOR_MODE_P (mode
))
10186 *cost
+= extra_cost
->ldst
.loadv
;
10189 /* Integer modes */
10190 if (GET_MODE_SIZE (mode
) == 8)
10191 *cost
+= extra_cost
->ldst
.ldrd
;
10193 *cost
+= extra_cost
->ldst
.load
;
10200 /* Helper for arm_bfi_p. */
10202 arm_bfi_1_p (rtx op0
, rtx op1
, rtx
*sub0
, rtx
*sub1
)
10204 unsigned HOST_WIDE_INT const1
;
10205 unsigned HOST_WIDE_INT const2
= 0;
10207 if (!CONST_INT_P (XEXP (op0
, 1)))
10210 const1
= UINTVAL (XEXP (op0
, 1));
10211 if (!CONST_INT_P (XEXP (op1
, 1))
10212 || ~UINTVAL (XEXP (op1
, 1)) != const1
)
10215 if (GET_CODE (XEXP (op0
, 0)) == ASHIFT
10216 && CONST_INT_P (XEXP (XEXP (op0
, 0), 1)))
10218 const2
= UINTVAL (XEXP (XEXP (op0
, 0), 1));
10219 *sub0
= XEXP (XEXP (op0
, 0), 0);
10222 *sub0
= XEXP (op0
, 0);
10224 if (const2
>= GET_MODE_BITSIZE (GET_MODE (op0
)))
10227 *sub1
= XEXP (op1
, 0);
10228 return exact_log2 (const1
+ (HOST_WIDE_INT_1U
<< const2
)) >= 0;
10231 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10232 format looks something like:
10234 (IOR (AND (reg1) (~const1))
10235 (AND (ASHIFT (reg2) (const2))
10238 where const1 is a consecutive sequence of 1-bits with the
10239 least-significant non-zero bit starting at bit position const2. If
10240 const2 is zero, then the shift will not appear at all, due to
10241 canonicalization. The two arms of the IOR expression may be
10244 arm_bfi_p (rtx x
, rtx
*sub0
, rtx
*sub1
)
10246 if (GET_CODE (x
) != IOR
)
10248 if (GET_CODE (XEXP (x
, 0)) != AND
10249 || GET_CODE (XEXP (x
, 1)) != AND
)
10251 return (arm_bfi_1_p (XEXP (x
, 0), XEXP (x
, 1), sub0
, sub1
)
10252 || arm_bfi_1_p (XEXP (x
, 1), XEXP (x
, 0), sub1
, sub0
));
10255 /* RTX costs. Make an estimate of the cost of executing the operation
10256 X, which is contained within an operation with code OUTER_CODE.
10257 SPEED_P indicates whether the cost desired is the performance cost,
10258 or the size cost. The estimate is stored in COST and the return
10259 value is TRUE if the cost calculation is final, or FALSE if the
10260 caller should recurse through the operands of X to add additional
10263 We currently make no attempt to model the size savings of Thumb-2
10264 16-bit instructions. At the normal points in compilation where
10265 this code is called we have no measure of whether the condition
10266 flags are live or not, and thus no realistic way to determine what
10267 the size will eventually be. */
10269 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
10270 const struct cpu_cost_table
*extra_cost
,
10271 int *cost
, bool speed_p
)
10273 machine_mode mode
= GET_MODE (x
);
10275 *cost
= COSTS_N_INSNS (1);
10280 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
10282 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
10290 /* SET RTXs don't have a mode so we get it from the destination. */
10291 mode
= GET_MODE (SET_DEST (x
));
10293 if (REG_P (SET_SRC (x
))
10294 && REG_P (SET_DEST (x
)))
10296 /* Assume that most copies can be done with a single insn,
10297 unless we don't have HW FP, in which case everything
10298 larger than word mode will require two insns. */
10299 *cost
= COSTS_N_INSNS (((!TARGET_VFP_BASE
10300 && GET_MODE_SIZE (mode
) > 4)
10303 /* Conditional register moves can be encoded
10304 in 16 bits in Thumb mode. */
10305 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
10311 if (CONST_INT_P (SET_SRC (x
)))
10313 /* Handle CONST_INT here, since the value doesn't have a mode
10314 and we would otherwise be unable to work out the true cost. */
10315 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
10318 /* Slightly lower the cost of setting a core reg to a constant.
10319 This helps break up chains and allows for better scheduling. */
10320 if (REG_P (SET_DEST (x
))
10321 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
10324 /* Immediate moves with an immediate in the range [0, 255] can be
10325 encoded in 16 bits in Thumb mode. */
10326 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
10327 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
10329 goto const_int_cost
;
10335 return arm_mem_costs (x
, extra_cost
, cost
, speed_p
);
10339 /* Calculations of LDM costs are complex. We assume an initial cost
10340 (ldm_1st) which will load the number of registers mentioned in
10341 ldm_regs_per_insn_1st registers; then each additional
10342 ldm_regs_per_insn_subsequent registers cost one more insn. The
10343 formula for N regs is thus:
10345 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10346 + ldm_regs_per_insn_subsequent - 1)
10347 / ldm_regs_per_insn_subsequent).
10349 Additional costs may also be added for addressing. A similar
10350 formula is used for STM. */
10352 bool is_ldm
= load_multiple_operation (x
, SImode
);
10353 bool is_stm
= store_multiple_operation (x
, SImode
);
10355 if (is_ldm
|| is_stm
)
10359 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
10360 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
10361 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
10362 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
10363 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
10364 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
10365 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
10367 *cost
+= regs_per_insn_1st
10368 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
10369 + regs_per_insn_sub
- 1)
10370 / regs_per_insn_sub
);
10379 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10380 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10381 *cost
+= COSTS_N_INSNS (speed_p
10382 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
10383 else if (mode
== SImode
&& TARGET_IDIV
)
10384 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
10386 *cost
= LIBCALL_COST (2);
10388 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10389 possible udiv is prefered. */
10390 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
10391 return false; /* All arguments must be in registers. */
10394 /* MOD by a power of 2 can be expanded as:
10396 and r0, r0, #(n - 1)
10397 and r1, r1, #(n - 1)
10398 rsbpl r0, r1, #0. */
10399 if (CONST_INT_P (XEXP (x
, 1))
10400 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
10403 *cost
+= COSTS_N_INSNS (3);
10406 *cost
+= 2 * extra_cost
->alu
.logical
10407 + extra_cost
->alu
.arith
;
10411 /* Fall-through. */
10413 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10414 possible udiv is prefered. */
10415 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
10416 return false; /* All arguments must be in registers. */
10419 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
10421 *cost
+= (COSTS_N_INSNS (1)
10422 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10424 *cost
+= extra_cost
->alu
.shift_reg
;
10432 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
10434 *cost
+= (COSTS_N_INSNS (2)
10435 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
10437 *cost
+= 2 * extra_cost
->alu
.shift
;
10438 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10439 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
10443 else if (mode
== SImode
)
10445 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10446 /* Slightly disparage register shifts at -Os, but not by much. */
10447 if (!CONST_INT_P (XEXP (x
, 1)))
10448 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10449 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10452 else if (GET_MODE_CLASS (mode
) == MODE_INT
10453 && GET_MODE_SIZE (mode
) < 4)
10455 if (code
== ASHIFT
)
10457 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10458 /* Slightly disparage register shifts at -Os, but not by
10460 if (!CONST_INT_P (XEXP (x
, 1)))
10461 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
10462 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
10464 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
10466 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
10468 /* Can use SBFX/UBFX. */
10470 *cost
+= extra_cost
->alu
.bfx
;
10471 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10475 *cost
+= COSTS_N_INSNS (1);
10476 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10479 if (CONST_INT_P (XEXP (x
, 1)))
10480 *cost
+= 2 * extra_cost
->alu
.shift
;
10482 *cost
+= (extra_cost
->alu
.shift
10483 + extra_cost
->alu
.shift_reg
);
10486 /* Slightly disparage register shifts. */
10487 *cost
+= !CONST_INT_P (XEXP (x
, 1));
10490 else /* Rotates. */
10492 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
10493 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10496 if (CONST_INT_P (XEXP (x
, 1)))
10497 *cost
+= (2 * extra_cost
->alu
.shift
10498 + extra_cost
->alu
.log_shift
);
10500 *cost
+= (extra_cost
->alu
.shift
10501 + extra_cost
->alu
.shift_reg
10502 + extra_cost
->alu
.log_shift_reg
);
10508 *cost
= LIBCALL_COST (2);
10514 if (mode
== SImode
)
10517 *cost
+= extra_cost
->alu
.rev
;
10524 /* No rev instruction available. Look at arm_legacy_rev
10525 and thumb_legacy_rev for the form of RTL used then. */
10528 *cost
+= COSTS_N_INSNS (9);
10532 *cost
+= 6 * extra_cost
->alu
.shift
;
10533 *cost
+= 3 * extra_cost
->alu
.logical
;
10538 *cost
+= COSTS_N_INSNS (4);
10542 *cost
+= 2 * extra_cost
->alu
.shift
;
10543 *cost
+= extra_cost
->alu
.arith_shift
;
10544 *cost
+= 2 * extra_cost
->alu
.logical
;
10552 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10553 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10555 if (GET_CODE (XEXP (x
, 0)) == MULT
10556 || GET_CODE (XEXP (x
, 1)) == MULT
)
10558 rtx mul_op0
, mul_op1
, sub_op
;
10561 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10563 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10565 mul_op0
= XEXP (XEXP (x
, 0), 0);
10566 mul_op1
= XEXP (XEXP (x
, 0), 1);
10567 sub_op
= XEXP (x
, 1);
10571 mul_op0
= XEXP (XEXP (x
, 1), 0);
10572 mul_op1
= XEXP (XEXP (x
, 1), 1);
10573 sub_op
= XEXP (x
, 0);
10576 /* The first operand of the multiply may be optionally
10578 if (GET_CODE (mul_op0
) == NEG
)
10579 mul_op0
= XEXP (mul_op0
, 0);
10581 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10582 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10583 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
10589 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10593 if (mode
== SImode
)
10595 rtx shift_by_reg
= NULL
;
10598 rtx op0
= XEXP (x
, 0);
10599 rtx op1
= XEXP (x
, 1);
10601 /* Factor out any borrow operation. There's more than one way
10602 of expressing this; try to recognize them all. */
10603 if (GET_CODE (op0
) == MINUS
)
10605 if (arm_borrow_operation (op1
, SImode
))
10607 op1
= XEXP (op0
, 1);
10608 op0
= XEXP (op0
, 0);
10610 else if (arm_borrow_operation (XEXP (op0
, 1), SImode
))
10611 op0
= XEXP (op0
, 0);
10613 else if (GET_CODE (op1
) == PLUS
10614 && arm_borrow_operation (XEXP (op1
, 0), SImode
))
10615 op1
= XEXP (op1
, 0);
10616 else if (GET_CODE (op0
) == NEG
10617 && arm_borrow_operation (op1
, SImode
))
10619 /* Negate with carry-in. For Thumb2 this is done with
10620 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10621 RSC instruction that exists in Arm mode. */
10623 *cost
+= (TARGET_THUMB2
10624 ? extra_cost
->alu
.arith_shift
10625 : extra_cost
->alu
.arith
);
10626 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed_p
);
10629 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10630 Note we do mean ~borrow here. */
10631 else if (TARGET_ARM
&& arm_carry_operation (op0
, SImode
))
10633 *cost
+= rtx_cost (op1
, mode
, code
, 1, speed_p
);
10637 shift_op
= shifter_op_p (op0
, &shift_by_reg
);
10638 if (shift_op
== NULL
)
10640 shift_op
= shifter_op_p (op1
, &shift_by_reg
);
10641 non_shift_op
= op0
;
10644 non_shift_op
= op1
;
10646 if (shift_op
!= NULL
)
10648 if (shift_by_reg
!= NULL
)
10651 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10652 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
10655 *cost
+= extra_cost
->alu
.arith_shift
;
10657 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
10658 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
10662 if (arm_arch_thumb2
10663 && GET_CODE (XEXP (x
, 1)) == MULT
)
10667 *cost
+= extra_cost
->mult
[0].add
;
10668 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
10669 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
10670 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
10674 if (CONST_INT_P (op0
))
10676 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
10677 INTVAL (op0
), NULL_RTX
,
10679 *cost
= COSTS_N_INSNS (insns
);
10681 *cost
+= insns
* extra_cost
->alu
.arith
;
10682 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10686 *cost
+= extra_cost
->alu
.arith
;
10688 /* Don't recurse as we don't want to cost any borrow that
10690 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed_p
);
10691 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10695 if (GET_MODE_CLASS (mode
) == MODE_INT
10696 && GET_MODE_SIZE (mode
) < 4)
10698 rtx shift_op
, shift_reg
;
10701 /* We check both sides of the MINUS for shifter operands since,
10702 unlike PLUS, it's not commutative. */
10704 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
10705 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
10707 /* Slightly disparage, as we might need to widen the result. */
10710 *cost
+= extra_cost
->alu
.arith
;
10712 if (CONST_INT_P (XEXP (x
, 0)))
10714 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
10721 if (mode
== DImode
)
10723 *cost
+= COSTS_N_INSNS (1);
10725 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
10727 rtx op1
= XEXP (x
, 1);
10730 *cost
+= 2 * extra_cost
->alu
.arith
;
10732 if (GET_CODE (op1
) == ZERO_EXTEND
)
10733 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
10736 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
10737 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10741 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10744 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
10745 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
10747 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
10750 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10751 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
10754 *cost
+= (extra_cost
->alu
.arith
10755 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
10756 ? extra_cost
->alu
.arith
10757 : extra_cost
->alu
.arith_shift
));
10758 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
10759 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10760 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
10765 *cost
+= 2 * extra_cost
->alu
.arith
;
10771 *cost
= LIBCALL_COST (2);
10775 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10776 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10778 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10780 rtx mul_op0
, mul_op1
, add_op
;
10783 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
10785 mul_op0
= XEXP (XEXP (x
, 0), 0);
10786 mul_op1
= XEXP (XEXP (x
, 0), 1);
10787 add_op
= XEXP (x
, 1);
10789 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
10790 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
10791 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
10797 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
10800 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10802 *cost
= LIBCALL_COST (2);
10806 /* Narrow modes can be synthesized in SImode, but the range
10807 of useful sub-operations is limited. Check for shift operations
10808 on one of the operands. Only left shifts can be used in the
10810 if (GET_MODE_CLASS (mode
) == MODE_INT
10811 && GET_MODE_SIZE (mode
) < 4)
10813 rtx shift_op
, shift_reg
;
10816 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
10818 if (CONST_INT_P (XEXP (x
, 1)))
10820 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10821 INTVAL (XEXP (x
, 1)), NULL_RTX
,
10823 *cost
= COSTS_N_INSNS (insns
);
10825 *cost
+= insns
* extra_cost
->alu
.arith
;
10826 /* Slightly penalize a narrow operation as the result may
10828 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
10832 /* Slightly penalize a narrow operation as the result may
10836 *cost
+= extra_cost
->alu
.arith
;
10841 if (mode
== SImode
)
10843 rtx shift_op
, shift_reg
;
10845 if (TARGET_INT_SIMD
10846 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10847 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
10849 /* UXTA[BH] or SXTA[BH]. */
10851 *cost
+= extra_cost
->alu
.extend_arith
;
10852 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
10854 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
10858 rtx op0
= XEXP (x
, 0);
10859 rtx op1
= XEXP (x
, 1);
10861 /* Handle a side effect of adding in the carry to an addition. */
10862 if (GET_CODE (op0
) == PLUS
10863 && arm_carry_operation (op1
, mode
))
10865 op1
= XEXP (op0
, 1);
10866 op0
= XEXP (op0
, 0);
10868 else if (GET_CODE (op1
) == PLUS
10869 && arm_carry_operation (op0
, mode
))
10871 op0
= XEXP (op1
, 0);
10872 op1
= XEXP (op1
, 1);
10874 else if (GET_CODE (op0
) == PLUS
)
10876 op0
= strip_carry_operation (op0
);
10877 if (swap_commutative_operands_p (op0
, op1
))
10878 std::swap (op0
, op1
);
10881 if (arm_carry_operation (op0
, mode
))
10883 /* Adding the carry to a register is a canonicalization of
10884 adding 0 to the register plus the carry. */
10886 *cost
+= extra_cost
->alu
.arith
;
10887 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10892 shift_op
= shifter_op_p (op0
, &shift_reg
);
10893 if (shift_op
!= NULL
)
10898 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10899 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10902 *cost
+= extra_cost
->alu
.arith_shift
;
10904 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
10905 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10909 if (GET_CODE (op0
) == MULT
)
10913 if (TARGET_DSP_MULTIPLY
10914 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
10915 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10916 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10917 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10918 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
10919 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
10920 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
10921 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
10922 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
10923 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
10924 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
10925 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
10928 /* SMLA[BT][BT]. */
10930 *cost
+= extra_cost
->mult
[0].extend_add
;
10931 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
10932 SIGN_EXTEND
, 0, speed_p
)
10933 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
10934 SIGN_EXTEND
, 0, speed_p
)
10935 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10940 *cost
+= extra_cost
->mult
[0].add
;
10941 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
10942 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
10943 + rtx_cost (op1
, mode
, PLUS
, 1, speed_p
));
10947 if (CONST_INT_P (op1
))
10949 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
10950 INTVAL (op1
), NULL_RTX
,
10952 *cost
= COSTS_N_INSNS (insns
);
10954 *cost
+= insns
* extra_cost
->alu
.arith
;
10955 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10960 *cost
+= extra_cost
->alu
.arith
;
10962 /* Don't recurse here because we want to test the operands
10963 without any carry operation. */
10964 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed_p
);
10965 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed_p
);
10969 if (mode
== DImode
)
10971 if (GET_CODE (XEXP (x
, 0)) == MULT
10972 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
10973 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
10974 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
10975 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
10978 *cost
+= extra_cost
->mult
[1].extend_add
;
10979 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10980 ZERO_EXTEND
, 0, speed_p
)
10981 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
10982 ZERO_EXTEND
, 0, speed_p
)
10983 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
10987 *cost
+= COSTS_N_INSNS (1);
10989 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10990 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
10993 *cost
+= (extra_cost
->alu
.arith
10994 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10995 ? extra_cost
->alu
.arith
10996 : extra_cost
->alu
.arith_shift
));
10998 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
11000 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
11005 *cost
+= 2 * extra_cost
->alu
.arith
;
11010 *cost
= LIBCALL_COST (2);
11015 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
11018 *cost
+= extra_cost
->alu
.rev
;
11022 else if (mode
== SImode
&& arm_arch_thumb2
11023 && arm_bfi_p (x
, &sub0
, &sub1
))
11025 *cost
+= rtx_cost (sub0
, mode
, ZERO_EXTRACT
, 1, speed_p
);
11026 *cost
+= rtx_cost (sub1
, mode
, ZERO_EXTRACT
, 0, speed_p
);
11028 *cost
+= extra_cost
->alu
.bfi
;
11034 /* Fall through. */
11035 case AND
: case XOR
:
11036 if (mode
== SImode
)
11038 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
11039 rtx op0
= XEXP (x
, 0);
11040 rtx shift_op
, shift_reg
;
11044 || (code
== IOR
&& TARGET_THUMB2
)))
11045 op0
= XEXP (op0
, 0);
11048 shift_op
= shifter_op_p (op0
, &shift_reg
);
11049 if (shift_op
!= NULL
)
11054 *cost
+= extra_cost
->alu
.log_shift_reg
;
11055 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11058 *cost
+= extra_cost
->alu
.log_shift
;
11060 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
11061 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11065 if (CONST_INT_P (XEXP (x
, 1)))
11067 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
11068 INTVAL (XEXP (x
, 1)), NULL_RTX
,
11071 *cost
= COSTS_N_INSNS (insns
);
11073 *cost
+= insns
* extra_cost
->alu
.logical
;
11074 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
11079 *cost
+= extra_cost
->alu
.logical
;
11080 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
11081 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
11085 if (mode
== DImode
)
11087 rtx op0
= XEXP (x
, 0);
11088 enum rtx_code subcode
= GET_CODE (op0
);
11090 *cost
+= COSTS_N_INSNS (1);
11094 || (code
== IOR
&& TARGET_THUMB2
)))
11095 op0
= XEXP (op0
, 0);
11097 if (GET_CODE (op0
) == ZERO_EXTEND
)
11100 *cost
+= 2 * extra_cost
->alu
.logical
;
11102 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
11104 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11107 else if (GET_CODE (op0
) == SIGN_EXTEND
)
11110 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
11112 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
11114 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
11119 *cost
+= 2 * extra_cost
->alu
.logical
;
11125 *cost
= LIBCALL_COST (2);
11129 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11130 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11132 rtx op0
= XEXP (x
, 0);
11134 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
11135 op0
= XEXP (op0
, 0);
11138 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
11140 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
11141 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
11144 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11146 *cost
= LIBCALL_COST (2);
11150 if (mode
== SImode
)
11152 if (TARGET_DSP_MULTIPLY
11153 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11154 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11155 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11156 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11157 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
11158 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11159 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11160 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
11161 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
11162 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
11163 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11164 && (INTVAL (XEXP (XEXP (x
, 1), 1))
11167 /* SMUL[TB][TB]. */
11169 *cost
+= extra_cost
->mult
[0].extend
;
11170 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
11171 SIGN_EXTEND
, 0, speed_p
);
11172 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
11173 SIGN_EXTEND
, 1, speed_p
);
11177 *cost
+= extra_cost
->mult
[0].simple
;
11181 if (mode
== DImode
)
11183 if ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
11184 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
11185 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
11186 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
))
11189 *cost
+= extra_cost
->mult
[1].extend
;
11190 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
11191 ZERO_EXTEND
, 0, speed_p
)
11192 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
11193 ZERO_EXTEND
, 0, speed_p
));
11197 *cost
= LIBCALL_COST (2);
11202 *cost
= LIBCALL_COST (2);
11206 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11207 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11209 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11212 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
11217 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11221 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11223 *cost
= LIBCALL_COST (1);
11227 if (mode
== SImode
)
11229 if (GET_CODE (XEXP (x
, 0)) == ABS
)
11231 *cost
+= COSTS_N_INSNS (1);
11232 /* Assume the non-flag-changing variant. */
11234 *cost
+= (extra_cost
->alu
.log_shift
11235 + extra_cost
->alu
.arith_shift
);
11236 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
11240 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
11241 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
11243 *cost
+= COSTS_N_INSNS (1);
11244 /* No extra cost for MOV imm and MVN imm. */
11245 /* If the comparison op is using the flags, there's no further
11246 cost, otherwise we need to add the cost of the comparison. */
11247 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
11248 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
11249 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
11251 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
11252 *cost
+= (COSTS_N_INSNS (1)
11253 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
11255 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
11258 *cost
+= extra_cost
->alu
.arith
;
11264 *cost
+= extra_cost
->alu
.arith
;
11268 if (GET_MODE_CLASS (mode
) == MODE_INT
11269 && GET_MODE_SIZE (mode
) < 4)
11271 /* Slightly disparage, as we might need an extend operation. */
11274 *cost
+= extra_cost
->alu
.arith
;
11278 if (mode
== DImode
)
11280 *cost
+= COSTS_N_INSNS (1);
11282 *cost
+= 2 * extra_cost
->alu
.arith
;
11287 *cost
= LIBCALL_COST (1);
11291 if (mode
== SImode
)
11294 rtx shift_reg
= NULL
;
11296 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11300 if (shift_reg
!= NULL
)
11303 *cost
+= extra_cost
->alu
.log_shift_reg
;
11304 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
11307 *cost
+= extra_cost
->alu
.log_shift
;
11308 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
11313 *cost
+= extra_cost
->alu
.logical
;
11316 if (mode
== DImode
)
11318 *cost
+= COSTS_N_INSNS (1);
11324 *cost
+= LIBCALL_COST (1);
11329 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
11331 *cost
+= COSTS_N_INSNS (3);
11334 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
11335 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
11337 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
11338 /* Assume that if one arm of the if_then_else is a register,
11339 that it will be tied with the result and eliminate the
11340 conditional insn. */
11341 if (REG_P (XEXP (x
, 1)))
11343 else if (REG_P (XEXP (x
, 2)))
11349 if (extra_cost
->alu
.non_exec_costs_exec
)
11350 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
11352 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
11355 *cost
+= op1cost
+ op2cost
;
11361 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
11365 machine_mode op0mode
;
11366 /* We'll mostly assume that the cost of a compare is the cost of the
11367 LHS. However, there are some notable exceptions. */
11369 /* Floating point compares are never done as side-effects. */
11370 op0mode
= GET_MODE (XEXP (x
, 0));
11371 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
11372 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11375 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
11377 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
11379 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
11385 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
11387 *cost
= LIBCALL_COST (2);
11391 /* DImode compares normally take two insns. */
11392 if (op0mode
== DImode
)
11394 *cost
+= COSTS_N_INSNS (1);
11396 *cost
+= 2 * extra_cost
->alu
.arith
;
11400 if (op0mode
== SImode
)
11405 if (XEXP (x
, 1) == const0_rtx
11406 && !(REG_P (XEXP (x
, 0))
11407 || (GET_CODE (XEXP (x
, 0)) == SUBREG
11408 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
11410 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11412 /* Multiply operations that set the flags are often
11413 significantly more expensive. */
11415 && GET_CODE (XEXP (x
, 0)) == MULT
11416 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
11417 *cost
+= extra_cost
->mult
[0].flag_setting
;
11420 && GET_CODE (XEXP (x
, 0)) == PLUS
11421 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11422 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
11424 *cost
+= extra_cost
->mult
[0].flag_setting
;
11429 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
11430 if (shift_op
!= NULL
)
11432 if (shift_reg
!= NULL
)
11434 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
11437 *cost
+= extra_cost
->alu
.arith_shift_reg
;
11440 *cost
+= extra_cost
->alu
.arith_shift
;
11441 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
11442 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
11447 *cost
+= extra_cost
->alu
.arith
;
11448 if (CONST_INT_P (XEXP (x
, 1))
11449 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11451 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
11459 *cost
= LIBCALL_COST (2);
11469 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11472 && TARGET_HARD_FLOAT
11473 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
11474 && (XEXP (x
, 1) == CONST0_RTX (mode
)))
11480 /* Fall through. */
11494 if (outer_code
== SET
)
11496 /* Is it a store-flag operation? */
11497 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11498 && XEXP (x
, 1) == const0_rtx
)
11500 /* Thumb also needs an IT insn. */
11501 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
11504 if (XEXP (x
, 1) == const0_rtx
)
11509 /* LSR Rd, Rn, #31. */
11511 *cost
+= extra_cost
->alu
.shift
;
11521 *cost
+= COSTS_N_INSNS (1);
11525 /* RSBS T1, Rn, Rn, LSR #31
11527 *cost
+= COSTS_N_INSNS (1);
11529 *cost
+= extra_cost
->alu
.arith_shift
;
11533 /* RSB Rd, Rn, Rn, ASR #1
11534 LSR Rd, Rd, #31. */
11535 *cost
+= COSTS_N_INSNS (1);
11537 *cost
+= (extra_cost
->alu
.arith_shift
11538 + extra_cost
->alu
.shift
);
11544 *cost
+= COSTS_N_INSNS (1);
11546 *cost
+= extra_cost
->alu
.shift
;
11550 /* Remaining cases are either meaningless or would take
11551 three insns anyway. */
11552 *cost
= COSTS_N_INSNS (3);
11555 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11560 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
11561 if (CONST_INT_P (XEXP (x
, 1))
11562 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
11564 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11571 /* Not directly inside a set. If it involves the condition code
11572 register it must be the condition for a branch, cond_exec or
11573 I_T_E operation. Since the comparison is performed elsewhere
11574 this is just the control part which has no additional
11576 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
11577 && XEXP (x
, 1) == const0_rtx
)
11585 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11586 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11589 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
11593 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11595 *cost
= LIBCALL_COST (1);
11599 if (mode
== SImode
)
11602 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
11606 *cost
= LIBCALL_COST (1);
11610 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
11611 && MEM_P (XEXP (x
, 0)))
11613 if (mode
== DImode
)
11614 *cost
+= COSTS_N_INSNS (1);
11619 if (GET_MODE (XEXP (x
, 0)) == SImode
)
11620 *cost
+= extra_cost
->ldst
.load
;
11622 *cost
+= extra_cost
->ldst
.load_sign_extend
;
11624 if (mode
== DImode
)
11625 *cost
+= extra_cost
->alu
.shift
;
11630 /* Widening from less than 32-bits requires an extend operation. */
11631 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11633 /* We have SXTB/SXTH. */
11634 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11636 *cost
+= extra_cost
->alu
.extend
;
11638 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11640 /* Needs two shifts. */
11641 *cost
+= COSTS_N_INSNS (1);
11642 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11644 *cost
+= 2 * extra_cost
->alu
.shift
;
11647 /* Widening beyond 32-bits requires one more insn. */
11648 if (mode
== DImode
)
11650 *cost
+= COSTS_N_INSNS (1);
11652 *cost
+= extra_cost
->alu
.shift
;
11659 || GET_MODE (XEXP (x
, 0)) == SImode
11660 || GET_MODE (XEXP (x
, 0)) == QImode
)
11661 && MEM_P (XEXP (x
, 0)))
11663 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11665 if (mode
== DImode
)
11666 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11671 /* Widening from less than 32-bits requires an extend operation. */
11672 if (GET_MODE (XEXP (x
, 0)) == QImode
)
11674 /* UXTB can be a shorter instruction in Thumb2, but it might
11675 be slower than the AND Rd, Rn, #255 alternative. When
11676 optimizing for speed it should never be slower to use
11677 AND, and we don't really model 16-bit vs 32-bit insns
11680 *cost
+= extra_cost
->alu
.logical
;
11682 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
11684 /* We have UXTB/UXTH. */
11685 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11687 *cost
+= extra_cost
->alu
.extend
;
11689 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
11691 /* Needs two shifts. It's marginally preferable to use
11692 shifts rather than two BIC instructions as the second
11693 shift may merge with a subsequent insn as a shifter
11695 *cost
= COSTS_N_INSNS (2);
11696 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11698 *cost
+= 2 * extra_cost
->alu
.shift
;
11701 /* Widening beyond 32-bits requires one more insn. */
11702 if (mode
== DImode
)
11704 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
11711 /* CONST_INT has no mode, so we cannot tell for sure how many
11712 insns are really going to be needed. The best we can do is
11713 look at the value passed. If it fits in SImode, then assume
11714 that's the mode it will be used for. Otherwise assume it
11715 will be used in DImode. */
11716 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
11721 /* Avoid blowing up in arm_gen_constant (). */
11722 if (!(outer_code
== PLUS
11723 || outer_code
== AND
11724 || outer_code
== IOR
11725 || outer_code
== XOR
11726 || outer_code
== MINUS
))
11730 if (mode
== SImode
)
11732 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
11733 INTVAL (x
), NULL
, NULL
,
11739 *cost
+= COSTS_N_INSNS (arm_gen_constant
11740 (outer_code
, SImode
, NULL
,
11741 trunc_int_for_mode (INTVAL (x
), SImode
),
11743 + arm_gen_constant (outer_code
, SImode
, NULL
,
11744 INTVAL (x
) >> 32, NULL
,
11756 if (arm_arch_thumb2
&& !flag_pic
)
11757 *cost
+= COSTS_N_INSNS (1);
11759 *cost
+= extra_cost
->ldst
.load
;
11762 *cost
+= COSTS_N_INSNS (1);
11766 *cost
+= COSTS_N_INSNS (1);
11768 *cost
+= extra_cost
->alu
.arith
;
11774 *cost
= COSTS_N_INSNS (4);
11779 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
11780 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
11782 if (vfp3_const_double_rtx (x
))
11785 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
11791 if (mode
== DFmode
)
11792 *cost
+= extra_cost
->ldst
.loadd
;
11794 *cost
+= extra_cost
->ldst
.loadf
;
11797 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
11801 *cost
= COSTS_N_INSNS (4);
11806 if (((TARGET_NEON
&& TARGET_HARD_FLOAT
11807 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
11808 || TARGET_HAVE_MVE
)
11809 && simd_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
11810 *cost
= COSTS_N_INSNS (1);
11812 *cost
= COSTS_N_INSNS (4);
11817 /* When optimizing for size, we prefer constant pool entries to
11818 MOVW/MOVT pairs, so bump the cost of these slightly. */
11825 *cost
+= extra_cost
->alu
.clz
;
11829 if (XEXP (x
, 1) == const0_rtx
)
11832 *cost
+= extra_cost
->alu
.log_shift
;
11833 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11836 /* Fall through. */
11840 *cost
+= COSTS_N_INSNS (1);
11844 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
11845 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11846 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
11847 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11848 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
11849 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
11850 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
11851 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11855 *cost
+= extra_cost
->mult
[1].extend
;
11856 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
11857 ZERO_EXTEND
, 0, speed_p
)
11858 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
11859 ZERO_EXTEND
, 0, speed_p
));
11862 *cost
= LIBCALL_COST (1);
11865 case UNSPEC_VOLATILE
:
11867 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
11870 /* Reading the PC is like reading any other register. Writing it
11871 is more expensive, but we take that into account elsewhere. */
11876 /* TODO: Simple zero_extract of bottom bits using AND. */
11877 /* Fall through. */
11881 && CONST_INT_P (XEXP (x
, 1))
11882 && CONST_INT_P (XEXP (x
, 2)))
11885 *cost
+= extra_cost
->alu
.bfx
;
11886 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11889 /* Without UBFX/SBFX, need to resort to shift operations. */
11890 *cost
+= COSTS_N_INSNS (1);
11892 *cost
+= 2 * extra_cost
->alu
.shift
;
11893 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
11897 if (TARGET_HARD_FLOAT
)
11900 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
11902 && GET_MODE (XEXP (x
, 0)) == HFmode
)
11904 /* Pre v8, widening HF->DF is a two-step process, first
11905 widening to SFmode. */
11906 *cost
+= COSTS_N_INSNS (1);
11908 *cost
+= extra_cost
->fp
[0].widen
;
11910 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11914 *cost
= LIBCALL_COST (1);
11917 case FLOAT_TRUNCATE
:
11918 if (TARGET_HARD_FLOAT
)
11921 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
11922 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
11924 /* Vector modes? */
11926 *cost
= LIBCALL_COST (1);
11930 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
11932 rtx op0
= XEXP (x
, 0);
11933 rtx op1
= XEXP (x
, 1);
11934 rtx op2
= XEXP (x
, 2);
11937 /* vfms or vfnma. */
11938 if (GET_CODE (op0
) == NEG
)
11939 op0
= XEXP (op0
, 0);
11941 /* vfnms or vfnma. */
11942 if (GET_CODE (op2
) == NEG
)
11943 op2
= XEXP (op2
, 0);
11945 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
11946 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
11947 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
11950 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
11955 *cost
= LIBCALL_COST (3);
11960 if (TARGET_HARD_FLOAT
)
11962 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11963 a vcvt fixed-point conversion. */
11964 if (code
== FIX
&& mode
== SImode
11965 && GET_CODE (XEXP (x
, 0)) == FIX
11966 && GET_MODE (XEXP (x
, 0)) == SFmode
11967 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11968 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
11972 *cost
+= extra_cost
->fp
[0].toint
;
11974 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
11979 if (GET_MODE_CLASS (mode
) == MODE_INT
)
11981 mode
= GET_MODE (XEXP (x
, 0));
11983 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
11984 /* Strip of the 'cost' of rounding towards zero. */
11985 if (GET_CODE (XEXP (x
, 0)) == FIX
)
11986 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
11989 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
11990 /* ??? Increase the cost to deal with transferring from
11991 FP -> CORE registers? */
11994 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
11998 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
12001 /* Vector costs? */
12003 *cost
= LIBCALL_COST (1);
12007 case UNSIGNED_FLOAT
:
12008 if (TARGET_HARD_FLOAT
)
12010 /* ??? Increase the cost to deal with transferring from CORE
12011 -> FP registers? */
12013 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
12016 *cost
= LIBCALL_COST (1);
12024 /* Just a guess. Guess number of instructions in the asm
12025 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12026 though (see PR60663). */
12027 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
12028 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
12030 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
12034 if (mode
!= VOIDmode
)
12035 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
12037 *cost
= COSTS_N_INSNS (4); /* Who knows? */
12042 #undef HANDLE_NARROW_SHIFT_ARITH
12044 /* RTX costs entry point. */
12047 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
12048 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
12051 int code
= GET_CODE (x
);
12052 gcc_assert (current_tune
->insn_extra_cost
);
12054 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
12055 (enum rtx_code
) outer_code
,
12056 current_tune
->insn_extra_cost
,
12059 if (dump_file
&& arm_verbose_cost
)
12061 print_rtl_single (dump_file
, x
);
12062 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
12063 *total
, result
? "final" : "partial");
12069 arm_insn_cost (rtx_insn
*insn
, bool speed
)
12073 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12074 will likely disappear during register allocation. */
12075 if (!reload_completed
12076 && GET_CODE (PATTERN (insn
)) == SET
12077 && REG_P (SET_DEST (PATTERN (insn
)))
12078 && REG_P (SET_SRC (PATTERN (insn
))))
12080 cost
= pattern_cost (PATTERN (insn
), speed
);
12081 /* If the cost is zero, then it's likely a complex insn. We don't want the
12082 cost of these to be less than something we know about. */
12083 return cost
? cost
: COSTS_N_INSNS (2);
12086 /* All address computations that can be done are free, but rtx cost returns
12087 the same for practically all of them. So we weight the different types
12088 of address here in the order (most pref first):
12089 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12091 arm_arm_address_cost (rtx x
)
12093 enum rtx_code c
= GET_CODE (x
);
12095 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
12097 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
12102 if (CONST_INT_P (XEXP (x
, 1)))
12105 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
12115 arm_thumb_address_cost (rtx x
)
12117 enum rtx_code c
= GET_CODE (x
);
12122 && REG_P (XEXP (x
, 0))
12123 && CONST_INT_P (XEXP (x
, 1)))
12130 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
12131 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
12133 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
12136 /* Adjust cost hook for XScale. */
12138 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12141 /* Some true dependencies can have a higher cost depending
12142 on precisely how certain input operands are used. */
12144 && recog_memoized (insn
) >= 0
12145 && recog_memoized (dep
) >= 0)
12147 int shift_opnum
= get_attr_shift (insn
);
12148 enum attr_type attr_type
= get_attr_type (dep
);
12150 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12151 operand for INSN. If we have a shifted input operand and the
12152 instruction we depend on is another ALU instruction, then we may
12153 have to account for an additional stall. */
12154 if (shift_opnum
!= 0
12155 && (attr_type
== TYPE_ALU_SHIFT_IMM_LSL_1TO4
12156 || attr_type
== TYPE_ALU_SHIFT_IMM_OTHER
12157 || attr_type
== TYPE_ALUS_SHIFT_IMM
12158 || attr_type
== TYPE_LOGIC_SHIFT_IMM
12159 || attr_type
== TYPE_LOGICS_SHIFT_IMM
12160 || attr_type
== TYPE_ALU_SHIFT_REG
12161 || attr_type
== TYPE_ALUS_SHIFT_REG
12162 || attr_type
== TYPE_LOGIC_SHIFT_REG
12163 || attr_type
== TYPE_LOGICS_SHIFT_REG
12164 || attr_type
== TYPE_MOV_SHIFT
12165 || attr_type
== TYPE_MVN_SHIFT
12166 || attr_type
== TYPE_MOV_SHIFT_REG
12167 || attr_type
== TYPE_MVN_SHIFT_REG
))
12169 rtx shifted_operand
;
12172 /* Get the shifted operand. */
12173 extract_insn (insn
);
12174 shifted_operand
= recog_data
.operand
[shift_opnum
];
12176 /* Iterate over all the operands in DEP. If we write an operand
12177 that overlaps with SHIFTED_OPERAND, then we have increase the
12178 cost of this dependency. */
12179 extract_insn (dep
);
12180 preprocess_constraints (dep
);
12181 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12183 /* We can ignore strict inputs. */
12184 if (recog_data
.operand_type
[opno
] == OP_IN
)
12187 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
12199 /* Adjust cost hook for Cortex A9. */
12201 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12211 case REG_DEP_OUTPUT
:
12212 if (recog_memoized (insn
) >= 0
12213 && recog_memoized (dep
) >= 0)
12215 if (GET_CODE (PATTERN (insn
)) == SET
)
12218 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
12220 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
12222 enum attr_type attr_type_insn
= get_attr_type (insn
);
12223 enum attr_type attr_type_dep
= get_attr_type (dep
);
12225 /* By default all dependencies of the form
12228 have an extra latency of 1 cycle because
12229 of the input and output dependency in this
12230 case. However this gets modeled as an true
12231 dependency and hence all these checks. */
12232 if (REG_P (SET_DEST (PATTERN (insn
)))
12233 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
12235 /* FMACS is a special case where the dependent
12236 instruction can be issued 3 cycles before
12237 the normal latency in case of an output
12239 if ((attr_type_insn
== TYPE_FMACS
12240 || attr_type_insn
== TYPE_FMACD
)
12241 && (attr_type_dep
== TYPE_FMACS
12242 || attr_type_dep
== TYPE_FMACD
))
12244 if (dep_type
== REG_DEP_OUTPUT
)
12245 *cost
= insn_default_latency (dep
) - 3;
12247 *cost
= insn_default_latency (dep
);
12252 if (dep_type
== REG_DEP_OUTPUT
)
12253 *cost
= insn_default_latency (dep
) + 1;
12255 *cost
= insn_default_latency (dep
);
12265 gcc_unreachable ();
12271 /* Adjust cost hook for FA726TE. */
12273 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
12276 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12277 have penalty of 3. */
12278 if (dep_type
== REG_DEP_TRUE
12279 && recog_memoized (insn
) >= 0
12280 && recog_memoized (dep
) >= 0
12281 && get_attr_conds (dep
) == CONDS_SET
)
12283 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12284 if (get_attr_conds (insn
) == CONDS_USE
12285 && get_attr_type (insn
) != TYPE_BRANCH
)
12291 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
12292 || get_attr_conds (insn
) == CONDS_USE
)
12302 /* Implement TARGET_REGISTER_MOVE_COST.
12304 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12305 it is typically more expensive than a single memory access. We set
12306 the cost to less than two memory accesses so that floating
12307 point to integer conversion does not go through memory. */
12310 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
12311 reg_class_t from
, reg_class_t to
)
12315 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
12316 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
12318 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
12319 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
12321 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
12328 if (from
== HI_REGS
|| to
== HI_REGS
)
12335 /* Implement TARGET_MEMORY_MOVE_COST. */
12338 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
12339 bool in ATTRIBUTE_UNUSED
)
12345 if (GET_MODE_SIZE (mode
) < 4)
12348 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
12352 /* Vectorizer cost model implementation. */
12354 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12356 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
12358 int misalign ATTRIBUTE_UNUSED
)
12362 switch (type_of_cost
)
12365 return current_tune
->vec_costs
->scalar_stmt_cost
;
12368 return current_tune
->vec_costs
->scalar_load_cost
;
12371 return current_tune
->vec_costs
->scalar_store_cost
;
12374 return current_tune
->vec_costs
->vec_stmt_cost
;
12377 return current_tune
->vec_costs
->vec_align_load_cost
;
12380 return current_tune
->vec_costs
->vec_store_cost
;
12382 case vec_to_scalar
:
12383 return current_tune
->vec_costs
->vec_to_scalar_cost
;
12385 case scalar_to_vec
:
12386 return current_tune
->vec_costs
->scalar_to_vec_cost
;
12388 case unaligned_load
:
12389 case vector_gather_load
:
12390 return current_tune
->vec_costs
->vec_unalign_load_cost
;
12392 case unaligned_store
:
12393 case vector_scatter_store
:
12394 return current_tune
->vec_costs
->vec_unalign_store_cost
;
12396 case cond_branch_taken
:
12397 return current_tune
->vec_costs
->cond_taken_branch_cost
;
12399 case cond_branch_not_taken
:
12400 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
12403 case vec_promote_demote
:
12404 return current_tune
->vec_costs
->vec_stmt_cost
;
12406 case vec_construct
:
12407 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
12408 return elements
/ 2 + 1;
12411 gcc_unreachable ();
12415 /* Return true if and only if this insn can dual-issue only as older. */
12417 cortexa7_older_only (rtx_insn
*insn
)
12419 if (recog_memoized (insn
) < 0)
12422 switch (get_attr_type (insn
))
12424 case TYPE_ALU_DSP_REG
:
12425 case TYPE_ALU_SREG
:
12426 case TYPE_ALUS_SREG
:
12427 case TYPE_LOGIC_REG
:
12428 case TYPE_LOGICS_REG
:
12430 case TYPE_ADCS_REG
:
12435 case TYPE_SHIFT_IMM
:
12436 case TYPE_SHIFT_REG
:
12437 case TYPE_LOAD_BYTE
:
12440 case TYPE_FFARITHS
:
12442 case TYPE_FFARITHD
:
12460 case TYPE_F_STORES
:
12467 /* Return true if and only if this insn can dual-issue as younger. */
12469 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
12471 if (recog_memoized (insn
) < 0)
12474 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
12478 switch (get_attr_type (insn
))
12481 case TYPE_ALUS_IMM
:
12482 case TYPE_LOGIC_IMM
:
12483 case TYPE_LOGICS_IMM
:
12488 case TYPE_MOV_SHIFT
:
12489 case TYPE_MOV_SHIFT_REG
:
12499 /* Look for an instruction that can dual issue only as an older
12500 instruction, and move it in front of any instructions that can
12501 dual-issue as younger, while preserving the relative order of all
12502 other instructions in the ready list. This is a hueuristic to help
12503 dual-issue in later cycles, by postponing issue of more flexible
12504 instructions. This heuristic may affect dual issue opportunities
12505 in the current cycle. */
12507 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
12508 int *n_readyp
, int clock
)
12511 int first_older_only
= -1, first_younger
= -1;
12515 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12519 /* Traverse the ready list from the head (the instruction to issue
12520 first), and looking for the first instruction that can issue as
12521 younger and the first instruction that can dual-issue only as
12523 for (i
= *n_readyp
- 1; i
>= 0; i
--)
12525 rtx_insn
*insn
= ready
[i
];
12526 if (cortexa7_older_only (insn
))
12528 first_older_only
= i
;
12530 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
12533 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
12537 /* Nothing to reorder because either no younger insn found or insn
12538 that can dual-issue only as older appears before any insn that
12539 can dual-issue as younger. */
12540 if (first_younger
== -1)
12543 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
12547 /* Nothing to reorder because no older-only insn in the ready list. */
12548 if (first_older_only
== -1)
12551 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
12555 /* Move first_older_only insn before first_younger. */
12557 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
12558 INSN_UID(ready
[first_older_only
]),
12559 INSN_UID(ready
[first_younger
]));
12560 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
12561 for (i
= first_older_only
; i
< first_younger
; i
++)
12563 ready
[i
] = ready
[i
+1];
12566 ready
[i
] = first_older_only_insn
;
12570 /* Implement TARGET_SCHED_REORDER. */
12572 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
12577 case TARGET_CPU_cortexa7
:
12578 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
12581 /* Do nothing for other cores. */
12585 return arm_issue_rate ();
12588 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12589 It corrects the value of COST based on the relationship between
12590 INSN and DEP through the dependence LINK. It returns the new
12591 value. There is a per-core adjust_cost hook to adjust scheduler costs
12592 and the per-core hook can choose to completely override the generic
12593 adjust_cost function. Only put bits of code into arm_adjust_cost that
12594 are common across all cores. */
12596 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
12601 /* When generating Thumb-1 code, we want to place flag-setting operations
12602 close to a conditional branch which depends on them, so that we can
12603 omit the comparison. */
12606 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
12607 && recog_memoized (dep
) >= 0
12608 && get_attr_conds (dep
) == CONDS_SET
)
12611 if (current_tune
->sched_adjust_cost
!= NULL
)
12613 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
12617 /* XXX Is this strictly true? */
12618 if (dep_type
== REG_DEP_ANTI
12619 || dep_type
== REG_DEP_OUTPUT
)
12622 /* Call insns don't incur a stall, even if they follow a load. */
12627 if ((i_pat
= single_set (insn
)) != NULL
12628 && MEM_P (SET_SRC (i_pat
))
12629 && (d_pat
= single_set (dep
)) != NULL
12630 && MEM_P (SET_DEST (d_pat
)))
12632 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
12633 /* This is a load after a store, there is no conflict if the load reads
12634 from a cached area. Assume that loads from the stack, and from the
12635 constant pool are cached, and that others will miss. This is a
12638 if ((SYMBOL_REF_P (src_mem
)
12639 && CONSTANT_POOL_ADDRESS_P (src_mem
))
12640 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
12641 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
12642 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
12650 arm_max_conditional_execute (void)
12652 return max_insns_skipped
;
12656 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
12659 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
12661 return (optimize
> 0) ? 2 : 0;
12665 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
12667 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12670 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12671 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12672 sequences of non-executed instructions in IT blocks probably take the same
12673 amount of time as executed instructions (and the IT instruction itself takes
12674 space in icache). This function was experimentally determined to give good
12675 results on a popular embedded benchmark. */
12678 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
12680 return (TARGET_32BIT
&& speed_p
) ? 1
12681 : arm_default_branch_cost (speed_p
, predictable_p
);
12685 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
12687 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
12690 static bool fp_consts_inited
= false;
12692 static REAL_VALUE_TYPE value_fp0
;
12695 init_fp_table (void)
12699 r
= REAL_VALUE_ATOF ("0", DFmode
);
12701 fp_consts_inited
= true;
12704 /* Return TRUE if rtx X is a valid immediate FP constant. */
12706 arm_const_double_rtx (rtx x
)
12708 const REAL_VALUE_TYPE
*r
;
12710 if (!fp_consts_inited
)
12713 r
= CONST_DOUBLE_REAL_VALUE (x
);
12714 if (REAL_VALUE_MINUS_ZERO (*r
))
12717 if (real_equal (r
, &value_fp0
))
12723 /* VFPv3 has a fairly wide range of representable immediates, formed from
12724 "quarter-precision" floating-point values. These can be evaluated using this
12725 formula (with ^ for exponentiation):
12729 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12730 16 <= n <= 31 and 0 <= r <= 7.
12732 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12734 - A (most-significant) is the sign bit.
12735 - BCD are the exponent (encoded as r XOR 3).
12736 - EFGH are the mantissa (encoded as n - 16).
12739 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12740 fconst[sd] instruction, or -1 if X isn't suitable. */
12742 vfp3_const_double_index (rtx x
)
12744 REAL_VALUE_TYPE r
, m
;
12745 int sign
, exponent
;
12746 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
12747 unsigned HOST_WIDE_INT mask
;
12748 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12751 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
12754 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12756 /* We can't represent these things, so detect them first. */
12757 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
12760 /* Extract sign, exponent and mantissa. */
12761 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
12762 r
= real_value_abs (&r
);
12763 exponent
= REAL_EXP (&r
);
12764 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12765 highest (sign) bit, with a fixed binary point at bit point_pos.
12766 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12767 bits for the mantissa, this may fail (low bits would be lost). */
12768 real_ldexp (&m
, &r
, point_pos
- exponent
);
12769 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12770 mantissa
= w
.elt (0);
12771 mant_hi
= w
.elt (1);
12773 /* If there are bits set in the low part of the mantissa, we can't
12774 represent this value. */
12778 /* Now make it so that mantissa contains the most-significant bits, and move
12779 the point_pos to indicate that the least-significant bits have been
12781 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12782 mantissa
= mant_hi
;
12784 /* We can permit four significant bits of mantissa only, plus a high bit
12785 which is always 1. */
12786 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
12787 if ((mantissa
& mask
) != 0)
12790 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12791 mantissa
>>= point_pos
- 5;
12793 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12794 floating-point immediate zero with Neon using an integer-zero load, but
12795 that case is handled elsewhere.) */
12799 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
12801 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12802 normalized significands are in the range [1, 2). (Our mantissa is shifted
12803 left 4 places at this point relative to normalized IEEE754 values). GCC
12804 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12805 REAL_EXP must be altered. */
12806 exponent
= 5 - exponent
;
12808 if (exponent
< 0 || exponent
> 7)
12811 /* Sign, mantissa and exponent are now in the correct form to plug into the
12812 formula described in the comment above. */
12813 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
12816 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12818 vfp3_const_double_rtx (rtx x
)
12823 return vfp3_const_double_index (x
) != -1;
12826 /* Recognize immediates which can be used in various Neon and MVE instructions.
12827 Legal immediates are described by the following table (for VMVN variants, the
12828 bitwise inverse of the constant shown is recognized. In either case, VMOV
12829 is output and the correct instruction to use for a given constant is chosen
12830 by the assembler). The constant shown is replicated across all elements of
12831 the destination vector.
12833 insn elems variant constant (binary)
12834 ---- ----- ------- -----------------
12835 vmov i32 0 00000000 00000000 00000000 abcdefgh
12836 vmov i32 1 00000000 00000000 abcdefgh 00000000
12837 vmov i32 2 00000000 abcdefgh 00000000 00000000
12838 vmov i32 3 abcdefgh 00000000 00000000 00000000
12839 vmov i16 4 00000000 abcdefgh
12840 vmov i16 5 abcdefgh 00000000
12841 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12842 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12843 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12844 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12845 vmvn i16 10 00000000 abcdefgh
12846 vmvn i16 11 abcdefgh 00000000
12847 vmov i32 12 00000000 00000000 abcdefgh 11111111
12848 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12849 vmov i32 14 00000000 abcdefgh 11111111 11111111
12850 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12851 vmov i8 16 abcdefgh
12852 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12853 eeeeeeee ffffffff gggggggg hhhhhhhh
12854 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12855 vmov f32 19 00000000 00000000 00000000 00000000
12857 For case 18, B = !b. Representable values are exactly those accepted by
12858 vfp3_const_double_index, but are output as floating-point numbers rather
12861 For case 19, we will change it to vmov.i32 when assembling.
12863 Variants 0-5 (inclusive) may also be used as immediates for the second
12864 operand of VORR/VBIC instructions.
12866 The INVERSE argument causes the bitwise inverse of the given operand to be
12867 recognized instead (used for recognizing legal immediates for the VAND/VORN
12868 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12869 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12870 output, rather than the real insns vbic/vorr).
12872 INVERSE makes no difference to the recognition of float vectors.
12874 The return value is the variant of immediate as shown in the above table, or
12875 -1 if the given value doesn't match any of the listed patterns.
12878 simd_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
12879 rtx
*modconst
, int *elementwidth
)
12881 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12883 for (i = 0; i < idx; i += (STRIDE)) \
12888 immtype = (CLASS); \
12889 elsize = (ELSIZE); \
12893 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
12894 unsigned int innersize
;
12895 unsigned char bytes
[16] = {};
12896 int immtype
= -1, matches
;
12897 unsigned int invmask
= inverse
? 0xff : 0;
12898 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
12901 n_elts
= CONST_VECTOR_NUNITS (op
);
12905 gcc_assert (mode
!= VOIDmode
);
12908 innersize
= GET_MODE_UNIT_SIZE (mode
);
12910 /* Only support 128-bit vectors for MVE. */
12911 if (TARGET_HAVE_MVE
12913 || (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
12914 || n_elts
* innersize
!= 16))
12917 if (!TARGET_HAVE_MVE
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
12920 /* Vectors of float constants. */
12921 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
12923 rtx el0
= CONST_VECTOR_ELT (op
, 0);
12925 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
12928 /* FP16 vectors cannot be represented. */
12929 if (GET_MODE_INNER (mode
) == HFmode
)
12932 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12933 are distinct in this context. */
12934 if (!const_vec_duplicate_p (op
))
12938 *modconst
= CONST_VECTOR_ELT (op
, 0);
12943 if (el0
== CONST0_RTX (GET_MODE (el0
)))
12949 /* The tricks done in the code below apply for little-endian vector layout.
12950 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12951 FIXME: Implement logic for big-endian vectors. */
12952 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
12955 /* Splat vector constant out into a byte vector. */
12956 for (i
= 0; i
< n_elts
; i
++)
12958 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
12959 unsigned HOST_WIDE_INT elpart
;
12961 gcc_assert (CONST_INT_P (el
));
12962 elpart
= INTVAL (el
);
12964 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
12966 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
12967 elpart
>>= BITS_PER_UNIT
;
12971 /* Sanity check. */
12972 gcc_assert (idx
== GET_MODE_SIZE (mode
));
12976 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
12977 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12979 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
12980 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
12982 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12983 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
12985 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
12986 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
12988 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
12990 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
12992 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
12993 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12995 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
12996 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
12998 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
12999 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13001 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13002 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
13004 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
13006 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
13008 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
13009 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
13011 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
13012 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
13014 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
13015 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
13017 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
13018 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
13020 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
13022 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
13023 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
13031 *elementwidth
= elsize
;
13035 unsigned HOST_WIDE_INT imm
= 0;
13037 /* Un-invert bytes of recognized vector, if necessary. */
13039 for (i
= 0; i
< idx
; i
++)
13040 bytes
[i
] ^= invmask
;
13044 /* FIXME: Broken on 32-bit H_W_I hosts. */
13045 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
13047 for (i
= 0; i
< 8; i
++)
13048 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
13049 << (i
* BITS_PER_UNIT
);
13051 *modconst
= GEN_INT (imm
);
13055 unsigned HOST_WIDE_INT imm
= 0;
13057 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
13058 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
13060 *modconst
= GEN_INT (imm
);
13068 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13069 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13070 (or zero for float elements), and a modified constant (whatever should be
13071 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13072 modified to "simd_immediate_valid_for_move" as this function will be used
13073 both by neon and mve. */
13075 simd_immediate_valid_for_move (rtx op
, machine_mode mode
,
13076 rtx
*modconst
, int *elementwidth
)
13080 int retval
= simd_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
13086 *modconst
= tmpconst
;
13089 *elementwidth
= tmpwidth
;
13094 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13095 the immediate is valid, write a constant suitable for using as an operand
13096 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13097 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13100 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
13101 rtx
*modconst
, int *elementwidth
)
13105 int retval
= simd_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
13107 if (retval
< 0 || retval
> 5)
13111 *modconst
= tmpconst
;
13114 *elementwidth
= tmpwidth
;
13119 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13120 the immediate is valid, write a constant suitable for using as an operand
13121 to VSHR/VSHL to *MODCONST and the corresponding element width to
13122 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13123 because they have different limitations. */
13126 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
13127 rtx
*modconst
, int *elementwidth
,
13130 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
13131 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
13132 unsigned HOST_WIDE_INT last_elt
= 0;
13133 unsigned HOST_WIDE_INT maxshift
;
13135 /* Split vector constant out into a byte vector. */
13136 for (i
= 0; i
< n_elts
; i
++)
13138 rtx el
= CONST_VECTOR_ELT (op
, i
);
13139 unsigned HOST_WIDE_INT elpart
;
13141 if (CONST_INT_P (el
))
13142 elpart
= INTVAL (el
);
13143 else if (CONST_DOUBLE_P (el
))
13146 gcc_unreachable ();
13148 if (i
!= 0 && elpart
!= last_elt
)
13154 /* Shift less than element size. */
13155 maxshift
= innersize
* 8;
13159 /* Left shift immediate value can be from 0 to <size>-1. */
13160 if (last_elt
>= maxshift
)
13165 /* Right shift immediate value can be from 1 to <size>. */
13166 if (last_elt
== 0 || last_elt
> maxshift
)
13171 *elementwidth
= innersize
* 8;
13174 *modconst
= CONST_VECTOR_ELT (op
, 0);
13179 /* Return a string suitable for output of Neon immediate logic operation
13183 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
13184 int inverse
, int quad
)
13186 int width
, is_valid
;
13187 static char templ
[40];
13189 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
13191 gcc_assert (is_valid
!= 0);
13194 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
13196 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
13201 /* Return a string suitable for output of Neon immediate shift operation
13202 (VSHR or VSHL) MNEM. */
13205 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
13206 machine_mode mode
, int quad
,
13209 int width
, is_valid
;
13210 static char templ
[40];
13212 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
13213 gcc_assert (is_valid
!= 0);
13216 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
13218 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
13223 /* Output a sequence of pairwise operations to implement a reduction.
13224 NOTE: We do "too much work" here, because pairwise operations work on two
13225 registers-worth of operands in one go. Unfortunately we can't exploit those
13226 extra calculations to do the full operation in fewer steps, I don't think.
13227 Although all vector elements of the result but the first are ignored, we
13228 actually calculate the same result in each of the elements. An alternative
13229 such as initially loading a vector with zero to use as each of the second
13230 operands would use up an additional register and take an extra instruction,
13231 for no particular gain. */
13234 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
13235 rtx (*reduc
) (rtx
, rtx
, rtx
))
13237 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
13240 for (i
= parts
/ 2; i
>= 1; i
/= 2)
13242 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
13243 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
13248 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13249 loaded into a register using VDUP.
13251 If this is the case, and GENERATE is set, we also generate
13252 instructions to do this and return an RTX to assign to the register. */
13255 neon_vdup_constant (rtx vals
, bool generate
)
13257 machine_mode mode
= GET_MODE (vals
);
13258 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13261 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
13264 if (!const_vec_duplicate_p (vals
, &x
))
13265 /* The elements are not all the same. We could handle repeating
13266 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13267 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13274 /* We can load this constant by using VDUP and a constant in a
13275 single ARM register. This will be cheaper than a vector
13278 x
= copy_to_mode_reg (inner_mode
, x
);
13279 return gen_vec_duplicate (mode
, x
);
13282 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13284 mve_bool_vec_to_const (rtx const_vec
)
13286 int n_elts
= GET_MODE_NUNITS ( GET_MODE (const_vec
));
13287 int repeat
= 16 / n_elts
;
13291 for (i
= 0; i
< n_elts
; i
++)
13293 rtx el
= CONST_VECTOR_ELT (const_vec
, i
);
13294 unsigned HOST_WIDE_INT elpart
;
13296 gcc_assert (CONST_INT_P (el
));
13297 elpart
= INTVAL (el
);
13299 for (int j
= 0; j
< repeat
; j
++)
13300 hi_val
|= elpart
<< (i
* repeat
+ j
);
13302 return gen_int_mode (hi_val
, HImode
);
13305 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13306 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13309 If this is the case, and GENERATE is set, we also generate code to do
13310 this and return an RTX to copy into the register. */
13313 neon_make_constant (rtx vals
, bool generate
)
13315 machine_mode mode
= GET_MODE (vals
);
13317 rtx const_vec
= NULL_RTX
;
13318 int n_elts
= GET_MODE_NUNITS (mode
);
13322 if (GET_CODE (vals
) == CONST_VECTOR
)
13324 else if (GET_CODE (vals
) == PARALLEL
)
13326 /* A CONST_VECTOR must contain only CONST_INTs and
13327 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13328 Only store valid constants in a CONST_VECTOR. */
13329 for (i
= 0; i
< n_elts
; ++i
)
13331 rtx x
= XVECEXP (vals
, 0, i
);
13332 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
13335 if (n_const
== n_elts
)
13336 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
13339 gcc_unreachable ();
13341 if (const_vec
!= NULL
13342 && simd_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
13343 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13345 else if (TARGET_HAVE_MVE
&& (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
))
13346 return mve_bool_vec_to_const (const_vec
);
13347 else if ((target
= neon_vdup_constant (vals
, generate
)) != NULL_RTX
)
13348 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13349 pipeline cycle; creating the constant takes one or two ARM
13350 pipeline cycles. */
13352 else if (const_vec
!= NULL_RTX
)
13353 /* Load from constant pool. On Cortex-A8 this takes two cycles
13354 (for either double or quad vectors). We cannot take advantage
13355 of single-cycle VLD1 because we need a PC-relative addressing
13357 return arm_disable_literal_pool
? NULL_RTX
: const_vec
;
13359 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13360 We cannot construct an initializer. */
13364 /* Initialize vector TARGET to VALS. */
13367 neon_expand_vector_init (rtx target
, rtx vals
)
13369 machine_mode mode
= GET_MODE (target
);
13370 machine_mode inner_mode
= GET_MODE_INNER (mode
);
13371 int n_elts
= GET_MODE_NUNITS (mode
);
13372 int n_var
= 0, one_var
= -1;
13373 bool all_same
= true;
13377 for (i
= 0; i
< n_elts
; ++i
)
13379 x
= XVECEXP (vals
, 0, i
);
13380 if (!CONSTANT_P (x
))
13381 ++n_var
, one_var
= i
;
13383 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
13389 rtx constant
= neon_make_constant (vals
);
13390 if (constant
!= NULL_RTX
)
13392 emit_move_insn (target
, constant
);
13397 /* Splat a single non-constant element if we can. */
13398 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
13400 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
13401 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
13405 /* One field is non-constant. Load constant then overwrite varying
13406 field. This is more efficient than using the stack. */
13409 rtx copy
= copy_rtx (vals
);
13410 rtx merge_mask
= GEN_INT (1 << one_var
);
13412 /* Load constant part of vector, substitute neighboring value for
13413 varying element. */
13414 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
13415 neon_expand_vector_init (target
, copy
);
13417 /* Insert variable. */
13418 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
13419 emit_insn (gen_vec_set_internal (mode
, target
, x
, merge_mask
, target
));
13423 /* Construct the vector in memory one field at a time
13424 and load the whole vector. */
13425 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
13426 for (i
= 0; i
< n_elts
; i
++)
13427 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
13428 i
* GET_MODE_SIZE (inner_mode
)),
13429 XVECEXP (vals
, 0, i
));
13430 emit_move_insn (target
, mem
);
13433 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13434 ERR if it doesn't. EXP indicates the source location, which includes the
13435 inlining history for intrinsics. */
13438 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13439 const_tree exp
, const char *desc
)
13441 HOST_WIDE_INT lane
;
13443 gcc_assert (CONST_INT_P (operand
));
13445 lane
= INTVAL (operand
);
13447 if (lane
< low
|| lane
>= high
)
13450 error_at (EXPR_LOCATION (exp
),
13451 "%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13453 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
13457 /* Bounds-check lanes. */
13460 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
13463 bounds_check (operand
, low
, high
, exp
, "lane");
13466 /* Bounds-check constants. */
13469 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
13471 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
13475 neon_element_bits (machine_mode mode
)
13477 return GET_MODE_UNIT_BITSIZE (mode
);
13481 /* Predicates for `match_operand' and `match_operator'. */
13483 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13484 WB level is 2 if full writeback address modes are allowed, 1
13485 if limited writeback address modes (POST_INC and PRE_DEC) are
13486 allowed and 0 if no writeback at all is supported. */
13489 arm_coproc_mem_operand_wb (rtx op
, int wb_level
)
13491 gcc_assert (wb_level
== 0 || wb_level
== 1 || wb_level
== 2);
13494 /* Reject eliminable registers. */
13495 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
13496 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13497 || reg_mentioned_p (arg_pointer_rtx
, op
)
13498 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13499 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13500 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13501 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13504 /* Constants are converted into offsets from labels. */
13508 ind
= XEXP (op
, 0);
13510 if (reload_completed
13511 && (LABEL_REF_P (ind
)
13512 || (GET_CODE (ind
) == CONST
13513 && GET_CODE (XEXP (ind
, 0)) == PLUS
13514 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13515 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13518 /* Match: (mem (reg)). */
13520 return arm_address_register_rtx_p (ind
, 0);
13522 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13523 acceptable in any case (subject to verification by
13524 arm_address_register_rtx_p). We need full writeback to accept
13525 PRE_INC and POST_DEC, and at least restricted writeback for
13526 PRE_INC and POST_DEC. */
13528 && (GET_CODE (ind
) == POST_INC
13529 || GET_CODE (ind
) == PRE_DEC
13531 && (GET_CODE (ind
) == PRE_INC
13532 || GET_CODE (ind
) == POST_DEC
))))
13533 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13536 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
13537 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
13538 && GET_CODE (XEXP (ind
, 1)) == PLUS
13539 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
13540 ind
= XEXP (ind
, 1);
13546 The encoded immediate for 16-bit modes is multiplied by 2,
13547 while the encoded immediate for 32-bit and 64-bit modes is
13548 multiplied by 4. */
13549 int factor
= MIN (GET_MODE_SIZE (GET_MODE (op
)), 4);
13550 if (GET_CODE (ind
) == PLUS
13551 && REG_P (XEXP (ind
, 0))
13552 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13553 && CONST_INT_P (XEXP (ind
, 1))
13554 && IN_RANGE (INTVAL (XEXP (ind
, 1)), -255 * factor
, 255 * factor
)
13555 && (INTVAL (XEXP (ind
, 1)) & (factor
- 1)) == 0)
13561 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13562 WB is true if full writeback address modes are allowed and is false
13563 if limited writeback address modes (POST_INC and PRE_DEC) are
13566 int arm_coproc_mem_operand (rtx op
, bool wb
)
13568 return arm_coproc_mem_operand_wb (op
, wb
? 2 : 1);
13571 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13572 context in which no writeback address modes are allowed. */
13575 arm_coproc_mem_operand_no_writeback (rtx op
)
13577 return arm_coproc_mem_operand_wb (op
, 0);
13580 /* This function returns TRUE on matching mode and op.
13581 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13582 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13584 mve_vector_mem_operand (machine_mode mode
, rtx op
, bool strict
)
13586 enum rtx_code code
;
13589 /* Match: (mem (reg)). */
13592 int reg_no
= REGNO (op
);
13593 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13594 ? reg_no
<= LAST_LO_REGNUM
13595 : reg_no
< LAST_ARM_REGNUM
)
13596 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13598 code
= GET_CODE (op
);
13600 if (code
== POST_INC
|| code
== PRE_DEC
13601 || code
== PRE_INC
|| code
== POST_DEC
)
13603 reg_no
= REGNO (XEXP (op
, 0));
13604 return (((mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
)
13605 ? reg_no
<= LAST_LO_REGNUM
13606 :(reg_no
< LAST_ARM_REGNUM
&& reg_no
!= SP_REGNUM
))
13607 || (!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
));
13609 else if (((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
13610 && GET_CODE (XEXP (op
, 1)) == PLUS
13611 && XEXP (op
, 0) == XEXP (XEXP (op
, 1), 0)
13612 && REG_P (XEXP (op
, 0))
13613 && GET_CODE (XEXP (XEXP (op
, 1), 1)) == CONST_INT
)
13614 /* Make sure to only accept PLUS after reload_completed, otherwise
13615 this will interfere with auto_inc's pattern detection. */
13616 || (reload_completed
&& code
== PLUS
&& REG_P (XEXP (op
, 0))
13617 && GET_CODE (XEXP (op
, 1)) == CONST_INT
))
13619 reg_no
= REGNO (XEXP (op
, 0));
13621 val
= INTVAL (XEXP (op
, 1));
13623 val
= INTVAL (XEXP(XEXP (op
, 1), 1));
13630 if (abs (val
) > 127)
13637 if (val
% 2 != 0 || abs (val
) > 254)
13642 if (val
% 4 != 0 || abs (val
) > 508)
13648 return ((!strict
&& reg_no
>= FIRST_PSEUDO_REGISTER
)
13649 || (MVE_STN_LDW_MODE (mode
)
13650 ? reg_no
<= LAST_LO_REGNUM
13651 : (reg_no
< LAST_ARM_REGNUM
13652 && (code
== PLUS
|| reg_no
!= SP_REGNUM
))));
13657 /* Return TRUE if OP is a memory operand which we can load or store a vector
13658 to/from. TYPE is one of the following values:
13659 0 - Vector load/stor (vldr)
13660 1 - Core registers (ldm)
13661 2 - Element/structure loads (vld1)
13664 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
13668 /* Reject eliminable registers. */
13669 if (strict
&& ! (reload_in_progress
|| reload_completed
)
13670 && (reg_mentioned_p (frame_pointer_rtx
, op
)
13671 || reg_mentioned_p (arg_pointer_rtx
, op
)
13672 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13673 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13674 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13675 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13678 /* Constants are converted into offsets from labels. */
13682 ind
= XEXP (op
, 0);
13684 if (reload_completed
13685 && (LABEL_REF_P (ind
)
13686 || (GET_CODE (ind
) == CONST
13687 && GET_CODE (XEXP (ind
, 0)) == PLUS
13688 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13689 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13692 /* Match: (mem (reg)). */
13694 return arm_address_register_rtx_p (ind
, 0);
13696 /* Allow post-increment with Neon registers. */
13697 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
13698 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
13699 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13701 /* Allow post-increment by register for VLDn */
13702 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
13703 && GET_CODE (XEXP (ind
, 1)) == PLUS
13704 && REG_P (XEXP (XEXP (ind
, 1), 1))
13705 && REG_P (XEXP (ind
, 0))
13706 && rtx_equal_p (XEXP (ind
, 0), XEXP (XEXP (ind
, 1), 0)))
13713 && GET_CODE (ind
) == PLUS
13714 && REG_P (XEXP (ind
, 0))
13715 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
13716 && CONST_INT_P (XEXP (ind
, 1))
13717 && INTVAL (XEXP (ind
, 1)) > -1024
13718 /* For quad modes, we restrict the constant offset to be slightly less
13719 than what the instruction format permits. We have no such constraint
13720 on double mode offsets. (This must match arm_legitimate_index_p.) */
13721 && (INTVAL (XEXP (ind
, 1))
13722 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
13723 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
13729 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13732 neon_struct_mem_operand (rtx op
)
13736 /* Reject eliminable registers. */
13737 if (! (reload_in_progress
|| reload_completed
)
13738 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
13739 || reg_mentioned_p (arg_pointer_rtx
, op
)
13740 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
13741 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
13742 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
13743 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
13746 /* Constants are converted into offsets from labels. */
13750 ind
= XEXP (op
, 0);
13752 if (reload_completed
13753 && (LABEL_REF_P (ind
)
13754 || (GET_CODE (ind
) == CONST
13755 && GET_CODE (XEXP (ind
, 0)) == PLUS
13756 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
13757 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
13760 /* Match: (mem (reg)). */
13762 return arm_address_register_rtx_p (ind
, 0);
13764 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13765 if (GET_CODE (ind
) == POST_INC
13766 || GET_CODE (ind
) == PRE_DEC
)
13767 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
13772 /* Prepares the operands for the VCMLA by lane instruction such that the right
13773 register number is selected. This instruction is special in that it always
13774 requires a D register, however there is a choice to be made between Dn[0],
13775 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13777 The VCMLA by lane function always selects two values. For instance given D0
13778 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13779 used by the instruction. However given V4SF then index 0 and 1 are valid as
13780 D0[0] or D1[0] are both valid.
13782 This function centralizes that information based on OPERANDS, OPERANDS[3]
13783 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13784 updated to contain the right index. */
13787 neon_vcmla_lane_prepare_operands (rtx
*operands
)
13789 int lane
= INTVAL (operands
[4]);
13790 machine_mode constmode
= SImode
;
13791 machine_mode mode
= GET_MODE (operands
[3]);
13792 int regno
= REGNO (operands
[3]);
13793 regno
= ((regno
- FIRST_VFP_REGNUM
) >> 1);
13794 if (lane
> 0 && lane
>= GET_MODE_NUNITS (mode
) / 4)
13796 operands
[3] = gen_int_mode (regno
+ 1, constmode
);
13798 = gen_int_mode (lane
- GET_MODE_NUNITS (mode
) / 4, constmode
);
13802 operands
[3] = gen_int_mode (regno
, constmode
);
13803 operands
[4] = gen_int_mode (lane
, constmode
);
13809 /* Return true if X is a register that will be eliminated later on. */
13811 arm_eliminable_register (rtx x
)
13813 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
13814 || REGNO (x
) == ARG_POINTER_REGNUM
13815 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
13816 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
13819 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13820 coprocessor registers. Otherwise return NO_REGS. */
13823 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
13825 if (mode
== HFmode
)
13827 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
13828 return GENERAL_REGS
;
13829 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
13831 return GENERAL_REGS
;
13834 /* The neon move patterns handle all legitimate vector and struct
13837 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
13838 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
13839 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
13840 || VALID_NEON_STRUCT_MODE (mode
)))
13843 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
13846 return GENERAL_REGS
;
13849 /* Values which must be returned in the most-significant end of the return
13853 arm_return_in_msb (const_tree valtype
)
13855 return (TARGET_AAPCS_BASED
13856 && BYTES_BIG_ENDIAN
13857 && (AGGREGATE_TYPE_P (valtype
)
13858 || TREE_CODE (valtype
) == COMPLEX_TYPE
13859 || FIXED_POINT_TYPE_P (valtype
)));
13862 /* Return TRUE if X references a SYMBOL_REF. */
13864 symbol_mentioned_p (rtx x
)
13869 if (SYMBOL_REF_P (x
))
13872 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13873 are constant offsets, not symbols. */
13874 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13877 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13879 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13885 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13886 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
13889 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
13896 /* Return TRUE if X references a LABEL_REF. */
13898 label_mentioned_p (rtx x
)
13903 if (LABEL_REF_P (x
))
13906 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13907 instruction, but they are constant offsets, not symbols. */
13908 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
13911 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
13912 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
13918 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
13919 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
13922 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
13930 tls_mentioned_p (rtx x
)
13932 switch (GET_CODE (x
))
13935 return tls_mentioned_p (XEXP (x
, 0));
13938 if (XINT (x
, 1) == UNSPEC_TLS
)
13941 /* Fall through. */
13947 /* Must not copy any rtx that uses a pc-relative address.
13948 Also, disallow copying of load-exclusive instructions that
13949 may appear after splitting of compare-and-swap-style operations
13950 so as to prevent those loops from being transformed away from their
13951 canonical forms (see PR 69904). */
13954 arm_cannot_copy_insn_p (rtx_insn
*insn
)
13956 /* The tls call insn cannot be copied, as it is paired with a data
13958 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
13961 subrtx_iterator::array_type array
;
13962 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
13964 const_rtx x
= *iter
;
13965 if (GET_CODE (x
) == UNSPEC
13966 && (XINT (x
, 1) == UNSPEC_PIC_BASE
13967 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
13971 rtx set
= single_set (insn
);
13974 rtx src
= SET_SRC (set
);
13975 if (GET_CODE (src
) == ZERO_EXTEND
)
13976 src
= XEXP (src
, 0);
13978 /* Catch the load-exclusive and load-acquire operations. */
13979 if (GET_CODE (src
) == UNSPEC_VOLATILE
13980 && (XINT (src
, 1) == VUNSPEC_LL
13981 || XINT (src
, 1) == VUNSPEC_LAX
))
13988 minmax_code (rtx x
)
13990 enum rtx_code code
= GET_CODE (x
);
14003 gcc_unreachable ();
14007 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14010 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
14011 int *mask
, bool *signed_sat
)
14013 /* The high bound must be a power of two minus one. */
14014 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
14018 /* The low bound is either zero (for usat) or one less than the
14019 negation of the high bound (for ssat). */
14020 if (INTVAL (lo_bound
) == 0)
14025 *signed_sat
= false;
14030 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
14035 *signed_sat
= true;
14043 /* Return 1 if memory locations are adjacent. */
14045 adjacent_mem_locations (rtx a
, rtx b
)
14047 /* We don't guarantee to preserve the order of these memory refs. */
14048 if (volatile_refs_p (a
) || volatile_refs_p (b
))
14051 if ((REG_P (XEXP (a
, 0))
14052 || (GET_CODE (XEXP (a
, 0)) == PLUS
14053 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
14054 && (REG_P (XEXP (b
, 0))
14055 || (GET_CODE (XEXP (b
, 0)) == PLUS
14056 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
14058 HOST_WIDE_INT val0
= 0, val1
= 0;
14062 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
14064 reg0
= XEXP (XEXP (a
, 0), 0);
14065 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
14068 reg0
= XEXP (a
, 0);
14070 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
14072 reg1
= XEXP (XEXP (b
, 0), 0);
14073 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
14076 reg1
= XEXP (b
, 0);
14078 /* Don't accept any offset that will require multiple
14079 instructions to handle, since this would cause the
14080 arith_adjacentmem pattern to output an overlong sequence. */
14081 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
14084 /* Don't allow an eliminable register: register elimination can make
14085 the offset too large. */
14086 if (arm_eliminable_register (reg0
))
14089 val_diff
= val1
- val0
;
14093 /* If the target has load delay slots, then there's no benefit
14094 to using an ldm instruction unless the offset is zero and
14095 we are optimizing for size. */
14096 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
14097 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
14098 && (val_diff
== 4 || val_diff
== -4));
14101 return ((REGNO (reg0
) == REGNO (reg1
))
14102 && (val_diff
== 4 || val_diff
== -4));
14108 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14109 for load operations, false for store operations. CONSECUTIVE is true
14110 if the register numbers in the operation must be consecutive in the register
14111 bank. RETURN_PC is true if value is to be loaded in PC.
14112 The pattern we are trying to match for load is:
14113 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14114 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14117 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14120 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14121 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14122 3. If consecutive is TRUE, then for kth register being loaded,
14123 REGNO (R_dk) = REGNO (R_d0) + k.
14124 The pattern for store is similar. */
14126 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
14127 bool consecutive
, bool return_pc
)
14129 HOST_WIDE_INT count
= XVECLEN (op
, 0);
14130 rtx reg
, mem
, addr
;
14132 unsigned first_regno
;
14133 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
14135 bool addr_reg_in_reglist
= false;
14136 bool update
= false;
14141 /* If not in SImode, then registers must be consecutive
14142 (e.g., VLDM instructions for DFmode). */
14143 gcc_assert ((mode
== SImode
) || consecutive
);
14144 /* Setting return_pc for stores is illegal. */
14145 gcc_assert (!return_pc
|| load
);
14147 /* Set up the increments and the regs per val based on the mode. */
14148 reg_increment
= GET_MODE_SIZE (mode
);
14149 regs_per_val
= reg_increment
/ 4;
14150 offset_adj
= return_pc
? 1 : 0;
14153 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
14154 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
14157 /* Check if this is a write-back. */
14158 elt
= XVECEXP (op
, 0, offset_adj
);
14159 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
14165 /* The offset adjustment must be the number of registers being
14166 popped times the size of a single register. */
14167 if (!REG_P (SET_DEST (elt
))
14168 || !REG_P (XEXP (SET_SRC (elt
), 0))
14169 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
14170 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
14171 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
14172 ((count
- 1 - offset_adj
) * reg_increment
))
14176 i
= i
+ offset_adj
;
14177 base
= base
+ offset_adj
;
14178 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14179 success depends on the type: VLDM can do just one reg,
14180 LDM must do at least two. */
14181 if ((count
<= i
) && (mode
== SImode
))
14184 elt
= XVECEXP (op
, 0, i
- 1);
14185 if (GET_CODE (elt
) != SET
)
14190 reg
= SET_DEST (elt
);
14191 mem
= SET_SRC (elt
);
14195 reg
= SET_SRC (elt
);
14196 mem
= SET_DEST (elt
);
14199 if (!REG_P (reg
) || !MEM_P (mem
))
14202 regno
= REGNO (reg
);
14203 first_regno
= regno
;
14204 addr
= XEXP (mem
, 0);
14205 if (GET_CODE (addr
) == PLUS
)
14207 if (!CONST_INT_P (XEXP (addr
, 1)))
14210 offset
= INTVAL (XEXP (addr
, 1));
14211 addr
= XEXP (addr
, 0);
14217 /* Don't allow SP to be loaded unless it is also the base register. It
14218 guarantees that SP is reset correctly when an LDM instruction
14219 is interrupted. Otherwise, we might end up with a corrupt stack. */
14220 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14223 if (regno
== REGNO (addr
))
14224 addr_reg_in_reglist
= true;
14226 for (; i
< count
; i
++)
14228 elt
= XVECEXP (op
, 0, i
);
14229 if (GET_CODE (elt
) != SET
)
14234 reg
= SET_DEST (elt
);
14235 mem
= SET_SRC (elt
);
14239 reg
= SET_SRC (elt
);
14240 mem
= SET_DEST (elt
);
14244 || GET_MODE (reg
) != mode
14245 || REGNO (reg
) <= regno
14248 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
14249 /* Don't allow SP to be loaded unless it is also the base register. It
14250 guarantees that SP is reset correctly when an LDM instruction
14251 is interrupted. Otherwise, we might end up with a corrupt stack. */
14252 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
14254 || GET_MODE (mem
) != mode
14255 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
14256 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
14257 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
14258 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
14259 offset
+ (i
- base
) * reg_increment
))
14260 && (!REG_P (XEXP (mem
, 0))
14261 || offset
+ (i
- base
) * reg_increment
!= 0)))
14264 regno
= REGNO (reg
);
14265 if (regno
== REGNO (addr
))
14266 addr_reg_in_reglist
= true;
14271 if (update
&& addr_reg_in_reglist
)
14274 /* For Thumb-1, address register is always modified - either by write-back
14275 or by explicit load. If the pattern does not describe an update,
14276 then the address register must be in the list of loaded registers. */
14278 return update
|| addr_reg_in_reglist
;
14284 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14285 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14288 [(set (reg:SI <N>) (const_int 0))
14289 (set (reg:SI <M>) (const_int 0))
14291 (unspec_volatile [(const_int 0)]
14293 (clobber (reg:CC CC_REGNUM))
14296 Any number (including 0) of set expressions is valid, the volatile unspec is
14297 optional. All registers but SP and PC are allowed and registers must be in
14298 strict increasing order.
14300 To be a valid VSCCLRM pattern, OP must have the following form:
14302 [(unspec_volatile [(const_int 0)]
14303 VUNSPEC_VSCCLRM_VPR)
14304 (set (reg:SF <N>) (const_int 0))
14305 (set (reg:SF <M>) (const_int 0))
14309 As with CLRM, any number (including 0) of set expressions is valid, however
14310 the volatile unspec is mandatory here. Any VFP single-precision register is
14311 accepted but all registers must be consecutive and in increasing order. */
14314 clear_operation_p (rtx op
, bool vfp
)
14317 unsigned last_regno
= INVALID_REGNUM
;
14318 rtx elt
, reg
, zero
;
14319 int count
= XVECLEN (op
, 0);
14320 int first_set
= vfp
? 1 : 0;
14321 machine_mode expected_mode
= vfp
? E_SFmode
: E_SImode
;
14323 for (int i
= first_set
; i
< count
; i
++)
14325 elt
= XVECEXP (op
, 0, i
);
14327 if (!vfp
&& GET_CODE (elt
) == UNSPEC_VOLATILE
)
14329 if (XINT (elt
, 1) != VUNSPEC_CLRM_APSR
14330 || XVECLEN (elt
, 0) != 1
14331 || XVECEXP (elt
, 0, 0) != CONST0_RTX (SImode
)
14338 if (GET_CODE (elt
) == CLOBBER
)
14341 if (GET_CODE (elt
) != SET
)
14344 reg
= SET_DEST (elt
);
14345 zero
= SET_SRC (elt
);
14348 || GET_MODE (reg
) != expected_mode
14349 || zero
!= CONST0_RTX (SImode
))
14352 regno
= REGNO (reg
);
14356 if (i
!= first_set
&& regno
!= last_regno
+ 1)
14361 if (regno
== SP_REGNUM
|| regno
== PC_REGNUM
)
14363 if (i
!= first_set
&& regno
<= last_regno
)
14367 last_regno
= regno
;
14373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14374 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14375 instruction. ADD_OFFSET is nonzero if the base address register needs
14376 to be modified with an add instruction before we can use it. */
14379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
14380 int nops
, HOST_WIDE_INT add_offset
)
14382 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14383 if the offset isn't small enough. The reason 2 ldrs are faster
14384 is because these ARMs are able to do more than one cache access
14385 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14386 whilst the ARM8 has a double bandwidth cache. This means that
14387 these cores can do both an instruction fetch and a data fetch in
14388 a single cycle, so the trick of calculating the address into a
14389 scratch register (one of the result regs) and then doing a load
14390 multiple actually becomes slower (and no smaller in code size).
14391 That is the transformation
14393 ldr rd1, [rbase + offset]
14394 ldr rd2, [rbase + offset + 4]
14398 add rd1, rbase, offset
14399 ldmia rd1, {rd1, rd2}
14401 produces worse code -- '3 cycles + any stalls on rd2' instead of
14402 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14403 access per cycle, the first sequence could never complete in less
14404 than 6 cycles, whereas the ldm sequence would only take 5 and
14405 would make better use of sequential accesses if not hitting the
14408 We cheat here and test 'arm_ld_sched' which we currently know to
14409 only be true for the ARM8, ARM9 and StrongARM. If this ever
14410 changes, then the test below needs to be reworked. */
14411 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
14414 /* XScale has load-store double instructions, but they have stricter
14415 alignment requirements than load-store multiple, so we cannot
14418 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14419 the pipeline until completion.
14427 An ldr instruction takes 1-3 cycles, but does not block the
14436 Best case ldr will always win. However, the more ldr instructions
14437 we issue, the less likely we are to be able to schedule them well.
14438 Using ldr instructions also increases code size.
14440 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14441 for counts of 3 or 4 regs. */
14442 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
14447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14448 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14449 an array ORDER which describes the sequence to use when accessing the
14450 offsets that produces an ascending order. In this sequence, each
14451 offset must be larger by exactly 4 than the previous one. ORDER[0]
14452 must have been filled in with the lowest offset by the caller.
14453 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14454 we use to verify that ORDER produces an ascending order of registers.
14455 Return true if it was possible to construct such an order, false if
14459 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
14460 int *unsorted_regs
)
14463 for (i
= 1; i
< nops
; i
++)
14467 order
[i
] = order
[i
- 1];
14468 for (j
= 0; j
< nops
; j
++)
14469 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
14471 /* We must find exactly one offset that is higher than the
14472 previous one by 4. */
14473 if (order
[i
] != order
[i
- 1])
14477 if (order
[i
] == order
[i
- 1])
14479 /* The register numbers must be ascending. */
14480 if (unsorted_regs
!= NULL
14481 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
14487 /* Used to determine in a peephole whether a sequence of load
14488 instructions can be changed into a load-multiple instruction.
14489 NOPS is the number of separate load instructions we are examining. The
14490 first NOPS entries in OPERANDS are the destination registers, the
14491 next NOPS entries are memory operands. If this function is
14492 successful, *BASE is set to the common base register of the memory
14493 accesses; *LOAD_OFFSET is set to the first memory location's offset
14494 from that base register.
14495 REGS is an array filled in with the destination register numbers.
14496 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14497 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14498 the sequence of registers in REGS matches the loads from ascending memory
14499 locations, and the function verifies that the register numbers are
14500 themselves ascending. If CHECK_REGS is false, the register numbers
14501 are stored in the order they are found in the operands. */
14503 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
14504 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
14506 int unsorted_regs
[MAX_LDM_STM_OPS
];
14507 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14508 int order
[MAX_LDM_STM_OPS
];
14512 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14513 easily extended if required. */
14514 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14516 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14518 /* Loop over the operands and check that the memory references are
14519 suitable (i.e. immediate offsets from the same base register). At
14520 the same time, extract the target register, and the memory
14522 for (i
= 0; i
< nops
; i
++)
14527 /* Convert a subreg of a mem into the mem itself. */
14528 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14529 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14531 gcc_assert (MEM_P (operands
[nops
+ i
]));
14533 /* Don't reorder volatile memory references; it doesn't seem worth
14534 looking for the case where the order is ok anyway. */
14535 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14538 offset
= const0_rtx
;
14540 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14542 && REG_P (reg
= SUBREG_REG (reg
))))
14543 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14544 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14546 && REG_P (reg
= SUBREG_REG (reg
))))
14547 && (CONST_INT_P (offset
14548 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14552 base_reg
= REGNO (reg
);
14553 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14556 else if (base_reg
!= (int) REGNO (reg
))
14557 /* Not addressed from the same base register. */
14560 unsorted_regs
[i
] = (REG_P (operands
[i
])
14561 ? REGNO (operands
[i
])
14562 : REGNO (SUBREG_REG (operands
[i
])));
14564 /* If it isn't an integer register, or if it overwrites the
14565 base register but isn't the last insn in the list, then
14566 we can't do this. */
14567 if (unsorted_regs
[i
] < 0
14568 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14569 || unsorted_regs
[i
] > 14
14570 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
14573 /* Don't allow SP to be loaded unless it is also the base
14574 register. It guarantees that SP is reset correctly when
14575 an LDM instruction is interrupted. Otherwise, we might
14576 end up with a corrupt stack. */
14577 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
14580 unsorted_offsets
[i
] = INTVAL (offset
);
14581 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14585 /* Not a suitable memory address. */
14589 /* All the useful information has now been extracted from the
14590 operands into unsorted_regs and unsorted_offsets; additionally,
14591 order[0] has been set to the lowest offset in the list. Sort
14592 the offsets into order, verifying that they are adjacent, and
14593 check that the register numbers are ascending. */
14594 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14595 check_regs
? unsorted_regs
: NULL
))
14599 memcpy (saved_order
, order
, sizeof order
);
14605 for (i
= 0; i
< nops
; i
++)
14606 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14608 *load_offset
= unsorted_offsets
[order
[0]];
14611 if (unsorted_offsets
[order
[0]] == 0)
14612 ldm_case
= 1; /* ldmia */
14613 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14614 ldm_case
= 2; /* ldmib */
14615 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14616 ldm_case
= 3; /* ldmda */
14617 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14618 ldm_case
= 4; /* ldmdb */
14619 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
14620 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
14625 if (!multiple_operation_profitable_p (false, nops
,
14627 ? unsorted_offsets
[order
[0]] : 0))
14633 /* Used to determine in a peephole whether a sequence of store instructions can
14634 be changed into a store-multiple instruction.
14635 NOPS is the number of separate store instructions we are examining.
14636 NOPS_TOTAL is the total number of instructions recognized by the peephole
14638 The first NOPS entries in OPERANDS are the source registers, the next
14639 NOPS entries are memory operands. If this function is successful, *BASE is
14640 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14641 to the first memory location's offset from that base register. REGS is an
14642 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14643 likewise filled with the corresponding rtx's.
14644 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14645 numbers to an ascending order of stores.
14646 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14647 from ascending memory locations, and the function verifies that the register
14648 numbers are themselves ascending. If CHECK_REGS is false, the register
14649 numbers are stored in the order they are found in the operands. */
14651 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
14652 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
14653 HOST_WIDE_INT
*load_offset
, bool check_regs
)
14655 int unsorted_regs
[MAX_LDM_STM_OPS
];
14656 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
14657 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
14658 int order
[MAX_LDM_STM_OPS
];
14660 rtx base_reg_rtx
= NULL
;
14663 /* Write back of base register is currently only supported for Thumb 1. */
14664 int base_writeback
= TARGET_THUMB1
;
14666 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14667 easily extended if required. */
14668 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
14670 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
14672 /* Loop over the operands and check that the memory references are
14673 suitable (i.e. immediate offsets from the same base register). At
14674 the same time, extract the target register, and the memory
14676 for (i
= 0; i
< nops
; i
++)
14681 /* Convert a subreg of a mem into the mem itself. */
14682 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
14683 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
14685 gcc_assert (MEM_P (operands
[nops
+ i
]));
14687 /* Don't reorder volatile memory references; it doesn't seem worth
14688 looking for the case where the order is ok anyway. */
14689 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
14692 offset
= const0_rtx
;
14694 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
14696 && REG_P (reg
= SUBREG_REG (reg
))))
14697 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
14698 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
14700 && REG_P (reg
= SUBREG_REG (reg
))))
14701 && (CONST_INT_P (offset
14702 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
14704 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
14705 ? operands
[i
] : SUBREG_REG (operands
[i
]));
14706 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
14710 base_reg
= REGNO (reg
);
14711 base_reg_rtx
= reg
;
14712 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
14715 else if (base_reg
!= (int) REGNO (reg
))
14716 /* Not addressed from the same base register. */
14719 /* If it isn't an integer register, then we can't do this. */
14720 if (unsorted_regs
[i
] < 0
14721 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
14722 /* The effects are unpredictable if the base register is
14723 both updated and stored. */
14724 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
14725 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
14726 || unsorted_regs
[i
] > 14)
14729 unsorted_offsets
[i
] = INTVAL (offset
);
14730 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
14734 /* Not a suitable memory address. */
14738 /* All the useful information has now been extracted from the
14739 operands into unsorted_regs and unsorted_offsets; additionally,
14740 order[0] has been set to the lowest offset in the list. Sort
14741 the offsets into order, verifying that they are adjacent, and
14742 check that the register numbers are ascending. */
14743 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
14744 check_regs
? unsorted_regs
: NULL
))
14748 memcpy (saved_order
, order
, sizeof order
);
14754 for (i
= 0; i
< nops
; i
++)
14756 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
14758 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
14761 *load_offset
= unsorted_offsets
[order
[0]];
14765 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
14768 if (unsorted_offsets
[order
[0]] == 0)
14769 stm_case
= 1; /* stmia */
14770 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
14771 stm_case
= 2; /* stmib */
14772 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
14773 stm_case
= 3; /* stmda */
14774 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
14775 stm_case
= 4; /* stmdb */
14779 if (!multiple_operation_profitable_p (false, nops
, 0))
14785 /* Routines for use in generating RTL. */
14787 /* Generate a load-multiple instruction. COUNT is the number of loads in
14788 the instruction; REGS and MEMS are arrays containing the operands.
14789 BASEREG is the base register to be used in addressing the memory operands.
14790 WBACK_OFFSET is nonzero if the instruction should update the base
14794 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14795 HOST_WIDE_INT wback_offset
)
14800 if (!multiple_operation_profitable_p (false, count
, 0))
14806 for (i
= 0; i
< count
; i
++)
14807 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
14809 if (wback_offset
!= 0)
14810 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14812 seq
= get_insns ();
14818 result
= gen_rtx_PARALLEL (VOIDmode
,
14819 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14820 if (wback_offset
!= 0)
14822 XVECEXP (result
, 0, 0)
14823 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14828 for (j
= 0; i
< count
; i
++, j
++)
14829 XVECEXP (result
, 0, i
)
14830 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
14835 /* Generate a store-multiple instruction. COUNT is the number of stores in
14836 the instruction; REGS and MEMS are arrays containing the operands.
14837 BASEREG is the base register to be used in addressing the memory operands.
14838 WBACK_OFFSET is nonzero if the instruction should update the base
14842 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
14843 HOST_WIDE_INT wback_offset
)
14848 if (GET_CODE (basereg
) == PLUS
)
14849 basereg
= XEXP (basereg
, 0);
14851 if (!multiple_operation_profitable_p (false, count
, 0))
14857 for (i
= 0; i
< count
; i
++)
14858 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
14860 if (wback_offset
!= 0)
14861 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14863 seq
= get_insns ();
14869 result
= gen_rtx_PARALLEL (VOIDmode
,
14870 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
14871 if (wback_offset
!= 0)
14873 XVECEXP (result
, 0, 0)
14874 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
14879 for (j
= 0; i
< count
; i
++, j
++)
14880 XVECEXP (result
, 0, i
)
14881 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
14886 /* Generate either a load-multiple or a store-multiple instruction. This
14887 function can be used in situations where we can start with a single MEM
14888 rtx and adjust its address upwards.
14889 COUNT is the number of operations in the instruction, not counting a
14890 possible update of the base register. REGS is an array containing the
14892 BASEREG is the base register to be used in addressing the memory operands,
14893 which are constructed from BASEMEM.
14894 WRITE_BACK specifies whether the generated instruction should include an
14895 update of the base register.
14896 OFFSETP is used to pass an offset to and from this function; this offset
14897 is not used when constructing the address (instead BASEMEM should have an
14898 appropriate offset in its address), it is used only for setting
14899 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14902 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
14903 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
14905 rtx mems
[MAX_LDM_STM_OPS
];
14906 HOST_WIDE_INT offset
= *offsetp
;
14909 gcc_assert (count
<= MAX_LDM_STM_OPS
);
14911 if (GET_CODE (basereg
) == PLUS
)
14912 basereg
= XEXP (basereg
, 0);
14914 for (i
= 0; i
< count
; i
++)
14916 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
14917 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
14925 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
14926 write_back
? 4 * count
: 0);
14928 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
14929 write_back
? 4 * count
: 0);
14933 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14934 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14936 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
14941 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
14942 rtx basemem
, HOST_WIDE_INT
*offsetp
)
14944 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
14948 /* Called from a peephole2 expander to turn a sequence of loads into an
14949 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14950 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14951 is true if we can reorder the registers because they are used commutatively
14953 Returns true iff we could generate a new instruction. */
14956 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
14958 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
14959 rtx mems
[MAX_LDM_STM_OPS
];
14960 int i
, j
, base_reg
;
14962 HOST_WIDE_INT offset
;
14963 int write_back
= FALSE
;
14967 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
14968 &base_reg
, &offset
, !sort_regs
);
14974 for (i
= 0; i
< nops
- 1; i
++)
14975 for (j
= i
+ 1; j
< nops
; j
++)
14976 if (regs
[i
] > regs
[j
])
14982 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
14986 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
14988 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14990 for (i
= 0; i
< nops
; i
++)
14991 if (base_reg
== regs
[i
])
14992 write_back
= false;
14994 /* Ensure the base is dead if it is updated. */
14995 if (write_back
&& !peep2_reg_dead_p (nops
, base_reg_rtx
))
15001 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
15002 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
15004 base_reg_rtx
= newbase
;
15007 for (i
= 0; i
< nops
; i
++)
15009 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15010 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15013 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15014 write_back
? offset
+ i
* 4 : 0));
15018 /* Called from a peephole2 expander to turn a sequence of stores into an
15019 STM instruction. OPERANDS are the operands found by the peephole matcher;
15020 NOPS indicates how many separate stores we are trying to combine.
15021 Returns true iff we could generate a new instruction. */
15024 gen_stm_seq (rtx
*operands
, int nops
)
15027 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15028 rtx mems
[MAX_LDM_STM_OPS
];
15031 HOST_WIDE_INT offset
;
15032 int write_back
= FALSE
;
15035 bool base_reg_dies
;
15037 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
15038 mem_order
, &base_reg
, &offset
, true);
15043 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15045 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
15048 gcc_assert (base_reg_dies
);
15054 gcc_assert (base_reg_dies
);
15055 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15059 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15061 for (i
= 0; i
< nops
; i
++)
15063 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15064 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15067 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
15068 write_back
? offset
+ i
* 4 : 0));
15072 /* Called from a peephole2 expander to turn a sequence of stores that are
15073 preceded by constant loads into an STM instruction. OPERANDS are the
15074 operands found by the peephole matcher; NOPS indicates how many
15075 separate stores we are trying to combine; there are 2 * NOPS
15076 instructions in the peephole.
15077 Returns true iff we could generate a new instruction. */
15080 gen_const_stm_seq (rtx
*operands
, int nops
)
15082 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
15083 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
15084 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
15085 rtx mems
[MAX_LDM_STM_OPS
];
15088 HOST_WIDE_INT offset
;
15089 int write_back
= FALSE
;
15092 bool base_reg_dies
;
15094 HARD_REG_SET allocated
;
15096 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
15097 mem_order
, &base_reg
, &offset
, false);
15102 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
15104 /* If the same register is used more than once, try to find a free
15106 CLEAR_HARD_REG_SET (allocated
);
15107 for (i
= 0; i
< nops
; i
++)
15109 for (j
= i
+ 1; j
< nops
; j
++)
15110 if (regs
[i
] == regs
[j
])
15112 rtx t
= peep2_find_free_register (0, nops
* 2,
15113 TARGET_THUMB1
? "l" : "r",
15114 SImode
, &allocated
);
15118 regs
[i
] = REGNO (t
);
15122 /* Compute an ordering that maps the register numbers to an ascending
15125 for (i
= 0; i
< nops
; i
++)
15126 if (regs
[i
] < regs
[reg_order
[0]])
15129 for (i
= 1; i
< nops
; i
++)
15131 int this_order
= reg_order
[i
- 1];
15132 for (j
= 0; j
< nops
; j
++)
15133 if (regs
[j
] > regs
[reg_order
[i
- 1]]
15134 && (this_order
== reg_order
[i
- 1]
15135 || regs
[j
] < regs
[this_order
]))
15137 reg_order
[i
] = this_order
;
15140 /* Ensure that registers that must be live after the instruction end
15141 up with the correct value. */
15142 for (i
= 0; i
< nops
; i
++)
15144 int this_order
= reg_order
[i
];
15145 if ((this_order
!= mem_order
[i
]
15146 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
15147 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
15151 /* Load the constants. */
15152 for (i
= 0; i
< nops
; i
++)
15154 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
15155 sorted_regs
[i
] = regs
[reg_order
[i
]];
15156 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
15159 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
15161 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
15164 gcc_assert (base_reg_dies
);
15170 gcc_assert (base_reg_dies
);
15171 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
15175 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
15177 for (i
= 0; i
< nops
; i
++)
15179 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
15180 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
15183 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
15184 write_back
? offset
+ i
* 4 : 0));
15188 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15189 unaligned copies on processors which support unaligned semantics for those
15190 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15191 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15192 An interleave factor of 1 (the minimum) will perform no interleaving.
15193 Load/store multiple are used for aligned addresses where possible. */
15196 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
15197 HOST_WIDE_INT length
,
15198 unsigned int interleave_factor
)
15200 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
15201 int *regnos
= XALLOCAVEC (int, interleave_factor
);
15202 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
15203 HOST_WIDE_INT i
, j
;
15204 HOST_WIDE_INT remaining
= length
, words
;
15205 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
15207 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
15208 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
15209 HOST_WIDE_INT srcoffset
, dstoffset
;
15210 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
15213 gcc_assert (interleave_factor
>= 1 && interleave_factor
<= 4);
15215 /* Use hard registers if we have aligned source or destination so we can use
15216 load/store multiple with contiguous registers. */
15217 if (dst_aligned
|| src_aligned
)
15218 for (i
= 0; i
< interleave_factor
; i
++)
15219 regs
[i
] = gen_rtx_REG (SImode
, i
);
15221 for (i
= 0; i
< interleave_factor
; i
++)
15222 regs
[i
] = gen_reg_rtx (SImode
);
15224 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
15225 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
15227 srcoffset
= dstoffset
= 0;
15229 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15230 For copying the last bytes we want to subtract this offset again. */
15231 src_autoinc
= dst_autoinc
= 0;
15233 for (i
= 0; i
< interleave_factor
; i
++)
15236 /* Copy BLOCK_SIZE_BYTES chunks. */
15238 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
15241 if (src_aligned
&& interleave_factor
> 1)
15243 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
15244 TRUE
, srcbase
, &srcoffset
));
15245 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15249 for (j
= 0; j
< interleave_factor
; j
++)
15251 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
15253 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15254 srcoffset
+ j
* UNITS_PER_WORD
);
15255 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15257 srcoffset
+= block_size_bytes
;
15261 if (dst_aligned
&& interleave_factor
> 1)
15263 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
15264 TRUE
, dstbase
, &dstoffset
));
15265 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
15269 for (j
= 0; j
< interleave_factor
; j
++)
15271 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
15273 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15274 dstoffset
+ j
* UNITS_PER_WORD
);
15275 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15277 dstoffset
+= block_size_bytes
;
15280 remaining
-= block_size_bytes
;
15283 /* Copy any whole words left (note these aren't interleaved with any
15284 subsequent halfword/byte load/stores in the interests of simplicity). */
15286 words
= remaining
/ UNITS_PER_WORD
;
15288 gcc_assert (words
< interleave_factor
);
15290 if (src_aligned
&& words
> 1)
15292 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
15294 src_autoinc
+= UNITS_PER_WORD
* words
;
15298 for (j
= 0; j
< words
; j
++)
15300 addr
= plus_constant (Pmode
, src
,
15301 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
15302 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
15303 srcoffset
+ j
* UNITS_PER_WORD
);
15305 emit_move_insn (regs
[j
], mem
);
15307 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
15309 srcoffset
+= words
* UNITS_PER_WORD
;
15312 if (dst_aligned
&& words
> 1)
15314 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
15316 dst_autoinc
+= words
* UNITS_PER_WORD
;
15320 for (j
= 0; j
< words
; j
++)
15322 addr
= plus_constant (Pmode
, dst
,
15323 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
15324 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
15325 dstoffset
+ j
* UNITS_PER_WORD
);
15327 emit_move_insn (mem
, regs
[j
]);
15329 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
15331 dstoffset
+= words
* UNITS_PER_WORD
;
15334 remaining
-= words
* UNITS_PER_WORD
;
15336 gcc_assert (remaining
< 4);
15338 /* Copy a halfword if necessary. */
15340 if (remaining
>= 2)
15342 halfword_tmp
= gen_reg_rtx (SImode
);
15344 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15345 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
15346 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
15348 /* Either write out immediately, or delay until we've loaded the last
15349 byte, depending on interleave factor. */
15350 if (interleave_factor
== 1)
15352 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15353 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15354 emit_insn (gen_unaligned_storehi (mem
,
15355 gen_lowpart (HImode
, halfword_tmp
)));
15356 halfword_tmp
= NULL
;
15364 gcc_assert (remaining
< 2);
15366 /* Copy last byte. */
15368 if ((remaining
& 1) != 0)
15370 byte_tmp
= gen_reg_rtx (SImode
);
15372 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
15373 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
15374 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
15376 if (interleave_factor
== 1)
15378 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15379 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15380 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15389 /* Store last halfword if we haven't done so already. */
15393 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15394 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
15395 emit_insn (gen_unaligned_storehi (mem
,
15396 gen_lowpart (HImode
, halfword_tmp
)));
15400 /* Likewise for last byte. */
15404 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
15405 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
15406 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
15410 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
15413 /* From mips_adjust_block_mem:
15415 Helper function for doing a loop-based block operation on memory
15416 reference MEM. Each iteration of the loop will operate on LENGTH
15419 Create a new base register for use within the loop and point it to
15420 the start of MEM. Create a new memory reference that uses this
15421 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15424 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
15427 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
15429 /* Although the new mem does not refer to a known location,
15430 it does keep up to LENGTH bytes of alignment. */
15431 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
15432 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
15435 /* From mips_block_move_loop:
15437 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15438 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15439 the memory regions do not overlap. */
15442 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
15443 unsigned int interleave_factor
,
15444 HOST_WIDE_INT bytes_per_iter
)
15446 rtx src_reg
, dest_reg
, final_src
, test
;
15447 HOST_WIDE_INT leftover
;
15449 leftover
= length
% bytes_per_iter
;
15450 length
-= leftover
;
15452 /* Create registers and memory references for use within the loop. */
15453 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
15454 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
15456 /* Calculate the value that SRC_REG should have after the last iteration of
15458 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
15459 0, 0, OPTAB_WIDEN
);
15461 /* Emit the start of the loop. */
15462 rtx_code_label
*label
= gen_label_rtx ();
15463 emit_label (label
);
15465 /* Emit the loop body. */
15466 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
15467 interleave_factor
);
15469 /* Move on to the next block. */
15470 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
15471 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
15473 /* Emit the loop condition. */
15474 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
15475 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
15477 /* Mop up any left-over bytes. */
15479 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
15482 /* Emit a block move when either the source or destination is unaligned (not
15483 aligned to a four-byte boundary). This may need further tuning depending on
15484 core type, optimize_size setting, etc. */
15487 arm_cpymemqi_unaligned (rtx
*operands
)
15489 HOST_WIDE_INT length
= INTVAL (operands
[2]);
15493 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
15494 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
15495 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15496 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15497 or dst_aligned though: allow more interleaving in those cases since the
15498 resulting code can be smaller. */
15499 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
15500 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
15503 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
15504 interleave_factor
, bytes_per_iter
);
15506 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
15507 interleave_factor
);
15511 /* Note that the loop created by arm_block_move_unaligned_loop may be
15512 subject to loop unrolling, which makes tuning this condition a little
15515 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
15517 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
15524 arm_gen_cpymemqi (rtx
*operands
)
15526 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
15527 HOST_WIDE_INT srcoffset
, dstoffset
;
15528 rtx src
, dst
, srcbase
, dstbase
;
15529 rtx part_bytes_reg
= NULL
;
15532 if (!CONST_INT_P (operands
[2])
15533 || !CONST_INT_P (operands
[3])
15534 || INTVAL (operands
[2]) > 64)
15537 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
15538 return arm_cpymemqi_unaligned (operands
);
15540 if (INTVAL (operands
[3]) & 3)
15543 dstbase
= operands
[0];
15544 srcbase
= operands
[1];
15546 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
15547 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
15549 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
15550 out_words_to_go
= INTVAL (operands
[2]) / 4;
15551 last_bytes
= INTVAL (operands
[2]) & 3;
15552 dstoffset
= srcoffset
= 0;
15554 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
15555 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
15557 while (in_words_to_go
>= 2)
15559 if (in_words_to_go
> 4)
15560 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
15561 TRUE
, srcbase
, &srcoffset
));
15563 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
15564 src
, FALSE
, srcbase
,
15567 if (out_words_to_go
)
15569 if (out_words_to_go
> 4)
15570 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
15571 TRUE
, dstbase
, &dstoffset
));
15572 else if (out_words_to_go
!= 1)
15573 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
15574 out_words_to_go
, dst
,
15577 dstbase
, &dstoffset
));
15580 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15581 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
15582 if (last_bytes
!= 0)
15584 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
15590 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
15591 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
15594 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15595 if (out_words_to_go
)
15599 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15600 sreg
= copy_to_reg (mem
);
15602 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
15603 emit_move_insn (mem
, sreg
);
15606 gcc_assert (!in_words_to_go
); /* Sanity check */
15609 if (in_words_to_go
)
15611 gcc_assert (in_words_to_go
> 0);
15613 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
15614 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
15617 gcc_assert (!last_bytes
|| part_bytes_reg
);
15619 if (BYTES_BIG_ENDIAN
&& last_bytes
)
15621 rtx tmp
= gen_reg_rtx (SImode
);
15623 /* The bytes we want are in the top end of the word. */
15624 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
15625 GEN_INT (8 * (4 - last_bytes
))));
15626 part_bytes_reg
= tmp
;
15630 mem
= adjust_automodify_address (dstbase
, QImode
,
15631 plus_constant (Pmode
, dst
,
15633 dstoffset
+ last_bytes
- 1);
15634 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15638 tmp
= gen_reg_rtx (SImode
);
15639 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
15640 part_bytes_reg
= tmp
;
15647 if (last_bytes
> 1)
15649 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
15650 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
15654 rtx tmp
= gen_reg_rtx (SImode
);
15655 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
15656 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
15657 part_bytes_reg
= tmp
;
15664 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
15665 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
15672 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15675 next_consecutive_mem (rtx mem
)
15677 machine_mode mode
= GET_MODE (mem
);
15678 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
15679 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
15681 return adjust_automodify_address (mem
, mode
, addr
, offset
);
15684 /* Copy using LDRD/STRD instructions whenever possible.
15685 Returns true upon success. */
15687 gen_cpymem_ldrd_strd (rtx
*operands
)
15689 unsigned HOST_WIDE_INT len
;
15690 HOST_WIDE_INT align
;
15691 rtx src
, dst
, base
;
15693 bool src_aligned
, dst_aligned
;
15694 bool src_volatile
, dst_volatile
;
15696 gcc_assert (CONST_INT_P (operands
[2]));
15697 gcc_assert (CONST_INT_P (operands
[3]));
15699 len
= UINTVAL (operands
[2]);
15703 /* Maximum alignment we can assume for both src and dst buffers. */
15704 align
= INTVAL (operands
[3]);
15706 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
15709 /* Place src and dst addresses in registers
15710 and update the corresponding mem rtx. */
15712 dst_volatile
= MEM_VOLATILE_P (dst
);
15713 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
15714 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
15715 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
15718 src_volatile
= MEM_VOLATILE_P (src
);
15719 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
15720 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
15721 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
15723 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
15726 if (src_volatile
|| dst_volatile
)
15729 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15730 if (!(dst_aligned
|| src_aligned
))
15731 return arm_gen_cpymemqi (operands
);
15733 /* If the either src or dst is unaligned we'll be accessing it as pairs
15734 of unaligned SImode accesses. Otherwise we can generate DImode
15735 ldrd/strd instructions. */
15736 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
15737 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
15742 reg0
= gen_reg_rtx (DImode
);
15743 rtx first_reg
= NULL_RTX
;
15744 rtx second_reg
= NULL_RTX
;
15746 if (!src_aligned
|| !dst_aligned
)
15748 if (BYTES_BIG_ENDIAN
)
15750 second_reg
= gen_lowpart (SImode
, reg0
);
15751 first_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15755 first_reg
= gen_lowpart (SImode
, reg0
);
15756 second_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
15759 if (MEM_ALIGN (src
) >= 2 * BITS_PER_WORD
)
15760 emit_move_insn (reg0
, src
);
15761 else if (src_aligned
)
15762 emit_insn (gen_unaligned_loaddi (reg0
, src
));
15765 emit_insn (gen_unaligned_loadsi (first_reg
, src
));
15766 src
= next_consecutive_mem (src
);
15767 emit_insn (gen_unaligned_loadsi (second_reg
, src
));
15770 if (MEM_ALIGN (dst
) >= 2 * BITS_PER_WORD
)
15771 emit_move_insn (dst
, reg0
);
15772 else if (dst_aligned
)
15773 emit_insn (gen_unaligned_storedi (dst
, reg0
));
15776 emit_insn (gen_unaligned_storesi (dst
, first_reg
));
15777 dst
= next_consecutive_mem (dst
);
15778 emit_insn (gen_unaligned_storesi (dst
, second_reg
));
15781 src
= next_consecutive_mem (src
);
15782 dst
= next_consecutive_mem (dst
);
15785 gcc_assert (len
< 8);
15788 /* More than a word but less than a double-word to copy. Copy a word. */
15789 reg0
= gen_reg_rtx (SImode
);
15790 src
= adjust_address (src
, SImode
, 0);
15791 dst
= adjust_address (dst
, SImode
, 0);
15793 emit_move_insn (reg0
, src
);
15795 emit_insn (gen_unaligned_loadsi (reg0
, src
));
15798 emit_move_insn (dst
, reg0
);
15800 emit_insn (gen_unaligned_storesi (dst
, reg0
));
15802 src
= next_consecutive_mem (src
);
15803 dst
= next_consecutive_mem (dst
);
15810 /* Copy the remaining bytes. */
15813 dst
= adjust_address (dst
, HImode
, 0);
15814 src
= adjust_address (src
, HImode
, 0);
15815 reg0
= gen_reg_rtx (SImode
);
15817 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
15819 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
15822 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
15824 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
15826 src
= next_consecutive_mem (src
);
15827 dst
= next_consecutive_mem (dst
);
15832 dst
= adjust_address (dst
, QImode
, 0);
15833 src
= adjust_address (src
, QImode
, 0);
15834 reg0
= gen_reg_rtx (QImode
);
15835 emit_move_insn (reg0
, src
);
15836 emit_move_insn (dst
, reg0
);
15840 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15841 into its component 32-bit subregs. OP2 may be an immediate
15842 constant and we want to simplify it in that case. */
15844 arm_decompose_di_binop (rtx op1
, rtx op2
, rtx
*lo_op1
, rtx
*hi_op1
,
15845 rtx
*lo_op2
, rtx
*hi_op2
)
15847 *lo_op1
= gen_lowpart (SImode
, op1
);
15848 *hi_op1
= gen_highpart (SImode
, op1
);
15849 *lo_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15850 subreg_lowpart_offset (SImode
, DImode
));
15851 *hi_op2
= simplify_gen_subreg (SImode
, op2
, DImode
,
15852 subreg_highpart_offset (SImode
, DImode
));
15855 /* Select a dominance comparison mode if possible for a test of the general
15856 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15857 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15858 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15859 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15860 In all cases OP will be either EQ or NE, but we don't need to know which
15861 here. If we are unable to support a dominance comparison we return
15862 CC mode. This will then fail to match for the RTL expressions that
15863 generate this call. */
15865 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
15867 enum rtx_code cond1
, cond2
;
15870 /* Currently we will probably get the wrong result if the individual
15871 comparisons are not simple. This also ensures that it is safe to
15872 reverse a comparison if necessary. */
15873 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
15875 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
15879 /* The if_then_else variant of this tests the second condition if the
15880 first passes, but is true if the first fails. Reverse the first
15881 condition to get a true "inclusive-or" expression. */
15882 if (cond_or
== DOM_CC_NX_OR_Y
)
15883 cond1
= reverse_condition (cond1
);
15885 /* If the comparisons are not equal, and one doesn't dominate the other,
15886 then we can't do this. */
15888 && !comparison_dominates_p (cond1
, cond2
)
15889 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
15893 std::swap (cond1
, cond2
);
15898 if (cond_or
== DOM_CC_X_AND_Y
)
15903 case EQ
: return CC_DEQmode
;
15904 case LE
: return CC_DLEmode
;
15905 case LEU
: return CC_DLEUmode
;
15906 case GE
: return CC_DGEmode
;
15907 case GEU
: return CC_DGEUmode
;
15908 default: gcc_unreachable ();
15912 if (cond_or
== DOM_CC_X_AND_Y
)
15924 gcc_unreachable ();
15928 if (cond_or
== DOM_CC_X_AND_Y
)
15940 gcc_unreachable ();
15944 if (cond_or
== DOM_CC_X_AND_Y
)
15945 return CC_DLTUmode
;
15950 return CC_DLTUmode
;
15952 return CC_DLEUmode
;
15956 gcc_unreachable ();
15960 if (cond_or
== DOM_CC_X_AND_Y
)
15961 return CC_DGTUmode
;
15966 return CC_DGTUmode
;
15968 return CC_DGEUmode
;
15972 gcc_unreachable ();
15975 /* The remaining cases only occur when both comparisons are the
15978 gcc_assert (cond1
== cond2
);
15982 gcc_assert (cond1
== cond2
);
15986 gcc_assert (cond1
== cond2
);
15990 gcc_assert (cond1
== cond2
);
15991 return CC_DLEUmode
;
15994 gcc_assert (cond1
== cond2
);
15995 return CC_DGEUmode
;
15998 gcc_unreachable ();
16003 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
16005 /* All floating point compares return CCFP if it is an equality
16006 comparison, and CCFPE otherwise. */
16007 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
16030 gcc_unreachable ();
16034 /* A compare with a shifted operand. Because of canonicalization, the
16035 comparison will have to be swapped when we emit the assembler. */
16036 if (GET_MODE (y
) == SImode
16037 && (REG_P (y
) || (SUBREG_P (y
)))
16038 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16039 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
16040 || GET_CODE (x
) == ROTATERT
))
16043 /* A widened compare of the sum of a value plus a carry against a
16044 constant. This is a representation of RSC. We want to swap the
16045 result of the comparison at output. Not valid if the Z bit is
16047 if (GET_MODE (x
) == DImode
16048 && GET_CODE (x
) == PLUS
16049 && arm_borrow_operation (XEXP (x
, 1), DImode
)
16051 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16052 && (op
== LE
|| op
== GT
))
16053 || (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
16054 && (op
== LEU
|| op
== GTU
))))
16057 /* If X is a constant we want to use CC_RSBmode. This is
16058 non-canonical, but arm_gen_compare_reg uses this to generate the
16059 correct canonical form. */
16060 if (GET_MODE (y
) == SImode
16061 && (REG_P (y
) || SUBREG_P (y
))
16062 && CONST_INT_P (x
))
16065 /* This operation is performed swapped, but since we only rely on the Z
16066 flag we don't need an additional mode. */
16067 if (GET_MODE (y
) == SImode
16068 && (REG_P (y
) || (SUBREG_P (y
)))
16069 && GET_CODE (x
) == NEG
16070 && (op
== EQ
|| op
== NE
))
16073 /* This is a special case that is used by combine to allow a
16074 comparison of a shifted byte load to be split into a zero-extend
16075 followed by a comparison of the shifted integer (only valid for
16076 equalities and unsigned inequalities). */
16077 if (GET_MODE (x
) == SImode
16078 && GET_CODE (x
) == ASHIFT
16079 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
16080 && GET_CODE (XEXP (x
, 0)) == SUBREG
16081 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
16082 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
16083 && (op
== EQ
|| op
== NE
16084 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
16085 && CONST_INT_P (y
))
16088 /* A construct for a conditional compare, if the false arm contains
16089 0, then both conditions must be true, otherwise either condition
16090 must be true. Not all conditions are possible, so CCmode is
16091 returned if it can't be done. */
16092 if (GET_CODE (x
) == IF_THEN_ELSE
16093 && (XEXP (x
, 2) == const0_rtx
16094 || XEXP (x
, 2) == const1_rtx
)
16095 && COMPARISON_P (XEXP (x
, 0))
16096 && COMPARISON_P (XEXP (x
, 1)))
16097 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16098 INTVAL (XEXP (x
, 2)));
16100 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16101 if (GET_CODE (x
) == AND
16102 && (op
== EQ
|| op
== NE
)
16103 && COMPARISON_P (XEXP (x
, 0))
16104 && COMPARISON_P (XEXP (x
, 1)))
16105 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16108 if (GET_CODE (x
) == IOR
16109 && (op
== EQ
|| op
== NE
)
16110 && COMPARISON_P (XEXP (x
, 0))
16111 && COMPARISON_P (XEXP (x
, 1)))
16112 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
16115 /* An operation (on Thumb) where we want to test for a single bit.
16116 This is done by shifting that bit up into the top bit of a
16117 scratch register; we can then branch on the sign bit. */
16119 && GET_MODE (x
) == SImode
16120 && (op
== EQ
|| op
== NE
)
16121 && GET_CODE (x
) == ZERO_EXTRACT
16122 && XEXP (x
, 1) == const1_rtx
)
16125 /* An operation that sets the condition codes as a side-effect, the
16126 V flag is not set correctly, so we can only use comparisons where
16127 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16129 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16130 if (GET_MODE (x
) == SImode
16132 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
16133 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
16134 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
16135 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
16136 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
16137 || GET_CODE (x
) == LSHIFTRT
16138 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
16139 || GET_CODE (x
) == ROTATERT
16140 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
16143 /* A comparison of ~reg with a const is really a special
16144 canoncialization of compare (~const, reg), which is a reverse
16145 subtract operation. We may not get here if CONST is 0, but that
16146 doesn't matter because ~0 isn't a valid immediate for RSB. */
16147 if (GET_MODE (x
) == SImode
16148 && GET_CODE (x
) == NOT
16149 && CONST_INT_P (y
))
16152 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
16155 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
16156 && GET_CODE (x
) == PLUS
16157 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
16160 if (GET_MODE (x
) == DImode
16161 && GET_CODE (x
) == PLUS
16162 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
16164 && UINTVAL (y
) == 0x800000000
16165 && (op
== GEU
|| op
== LTU
))
16168 if (GET_MODE (x
) == DImode
16169 && (op
== GE
|| op
== LT
)
16170 && GET_CODE (x
) == SIGN_EXTEND
16171 && ((GET_CODE (y
) == PLUS
16172 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16173 || arm_borrow_operation (y
, DImode
)))
16176 if (GET_MODE (x
) == DImode
16177 && (op
== GEU
|| op
== LTU
)
16178 && GET_CODE (x
) == ZERO_EXTEND
16179 && ((GET_CODE (y
) == PLUS
16180 && arm_borrow_operation (XEXP (y
, 0), DImode
))
16181 || arm_borrow_operation (y
, DImode
)))
16184 if (GET_MODE (x
) == DImode
16185 && (op
== EQ
|| op
== NE
)
16186 && (GET_CODE (x
) == PLUS
16187 || GET_CODE (x
) == MINUS
)
16188 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
16189 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
16190 && GET_CODE (y
) == SIGN_EXTEND
16191 && GET_CODE (XEXP (y
, 0)) == GET_CODE (x
))
16194 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
16195 return GET_MODE (x
);
16200 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16201 the sequence of instructions needed to generate a suitable condition
16202 code register. Return the CC register result. */
16204 arm_gen_dicompare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16209 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16210 gcc_assert (TARGET_32BIT
);
16211 gcc_assert (!CONST_INT_P (x
));
16213 rtx x_lo
= simplify_gen_subreg (SImode
, x
, DImode
,
16214 subreg_lowpart_offset (SImode
, DImode
));
16215 rtx x_hi
= simplify_gen_subreg (SImode
, x
, DImode
,
16216 subreg_highpart_offset (SImode
, DImode
));
16217 rtx y_lo
= simplify_gen_subreg (SImode
, y
, DImode
,
16218 subreg_lowpart_offset (SImode
, DImode
));
16219 rtx y_hi
= simplify_gen_subreg (SImode
, y
, DImode
,
16220 subreg_highpart_offset (SImode
, DImode
));
16226 if (y_lo
== const0_rtx
|| y_hi
== const0_rtx
)
16228 if (y_lo
!= const0_rtx
)
16230 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16232 gcc_assert (y_hi
== const0_rtx
);
16233 y_lo
= gen_int_mode (-INTVAL (y_lo
), SImode
);
16234 if (!arm_add_operand (y_lo
, SImode
))
16235 y_lo
= force_reg (SImode
, y_lo
);
16236 emit_insn (gen_addsi3 (scratch2
, x_lo
, y_lo
));
16239 else if (y_hi
!= const0_rtx
)
16241 rtx scratch2
= scratch
? scratch
: gen_reg_rtx (SImode
);
16243 y_hi
= gen_int_mode (-INTVAL (y_hi
), SImode
);
16244 if (!arm_add_operand (y_hi
, SImode
))
16245 y_hi
= force_reg (SImode
, y_hi
);
16246 emit_insn (gen_addsi3 (scratch2
, x_hi
, y_hi
));
16252 gcc_assert (!reload_completed
);
16253 scratch
= gen_rtx_SCRATCH (SImode
);
16256 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
16257 cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
16260 = gen_rtx_SET (cc_reg
,
16261 gen_rtx_COMPARE (CC_NZmode
,
16262 gen_rtx_IOR (SImode
, x_lo
, x_hi
),
16264 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
,
16269 if (!arm_add_operand (y_lo
, SImode
))
16270 y_lo
= force_reg (SImode
, y_lo
);
16272 if (!arm_add_operand (y_hi
, SImode
))
16273 y_hi
= force_reg (SImode
, y_hi
);
16275 rtx cmp1
= gen_rtx_NE (SImode
, x_lo
, y_lo
);
16276 rtx cmp2
= gen_rtx_NE (SImode
, x_hi
, y_hi
);
16277 rtx conjunction
= gen_rtx_IOR (SImode
, cmp1
, cmp2
);
16278 mode
= SELECT_CC_MODE (code
, conjunction
, const0_rtx
);
16279 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16281 emit_insn (gen_rtx_SET (cc_reg
,
16282 gen_rtx_COMPARE (mode
, conjunction
,
16290 if (y_lo
== const0_rtx
)
16292 /* If the low word of y is 0, then this is simply a normal
16293 compare of the upper words. */
16294 if (!arm_add_operand (y_hi
, SImode
))
16295 y_hi
= force_reg (SImode
, y_hi
);
16297 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16300 if (!arm_add_operand (y_lo
, SImode
))
16301 y_lo
= force_reg (SImode
, y_lo
);
16304 = gen_rtx_LTU (DImode
,
16305 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16309 scratch
= gen_rtx_SCRATCH (SImode
);
16311 if (!arm_not_operand (y_hi
, SImode
))
16312 y_hi
= force_reg (SImode
, y_hi
);
16315 if (y_hi
== const0_rtx
)
16316 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch
, x_hi
,
16318 else if (CONST_INT_P (y_hi
))
16319 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch
, x_hi
,
16322 insn
= emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch
, x_hi
, y_hi
,
16324 return SET_DEST (single_set (insn
));
16330 /* During expansion, we only expect to get here if y is a
16331 constant that we want to handle, otherwise we should have
16332 swapped the operands already. */
16333 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16335 if (!const_ok_for_arm (INTVAL (y_lo
)))
16336 y_lo
= force_reg (SImode
, y_lo
);
16338 /* Perform a reverse subtract and compare. */
16340 = gen_rtx_LTU (DImode
,
16341 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16343 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_NVout_scratch (scratch
, y_hi
,
16345 return SET_DEST (single_set (insn
));
16351 if (y_lo
== const0_rtx
)
16353 /* If the low word of y is 0, then this is simply a normal
16354 compare of the upper words. */
16355 if (!arm_add_operand (y_hi
, SImode
))
16356 y_hi
= force_reg (SImode
, y_hi
);
16358 return arm_gen_compare_reg (code
, x_hi
, y_hi
, NULL_RTX
);
16361 if (!arm_add_operand (y_lo
, SImode
))
16362 y_lo
= force_reg (SImode
, y_lo
);
16365 = gen_rtx_LTU (DImode
,
16366 arm_gen_compare_reg (LTU
, x_lo
, y_lo
, NULL_RTX
),
16370 scratch
= gen_rtx_SCRATCH (SImode
);
16371 if (!arm_not_operand (y_hi
, SImode
))
16372 y_hi
= force_reg (SImode
, y_hi
);
16375 if (y_hi
== const0_rtx
)
16376 insn
= emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch
, x_hi
,
16378 else if (CONST_INT_P (y_hi
))
16380 /* Constant is viewed as unsigned when zero-extended. */
16381 y_hi
= GEN_INT (UINTVAL (y_hi
) & 0xffffffffULL
);
16382 insn
= emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch
, x_hi
,
16386 insn
= emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch
, x_hi
, y_hi
,
16388 return SET_DEST (single_set (insn
));
16394 /* During expansion, we only expect to get here if y is a
16395 constant that we want to handle, otherwise we should have
16396 swapped the operands already. */
16397 gcc_assert (arm_const_double_prefer_rsbs_rsc (y
));
16399 if (!const_ok_for_arm (INTVAL (y_lo
)))
16400 y_lo
= force_reg (SImode
, y_lo
);
16402 /* Perform a reverse subtract and compare. */
16404 = gen_rtx_LTU (DImode
,
16405 arm_gen_compare_reg (LTU
, y_lo
, x_lo
, scratch
),
16407 y_hi
= GEN_INT (0xffffffff & UINTVAL (y_hi
));
16408 rtx_insn
*insn
= emit_insn (gen_rscsi3_CC_Bout_scratch (scratch
, y_hi
,
16410 return SET_DEST (single_set (insn
));
16414 gcc_unreachable ();
16418 /* X and Y are two things to compare using CODE. Emit the compare insn and
16419 return the rtx for register 0 in the proper mode. */
16421 arm_gen_compare_reg (rtx_code code
, rtx x
, rtx y
, rtx scratch
)
16423 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
16424 return arm_gen_dicompare_reg (code
, x
, y
, scratch
);
16426 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
16427 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
16428 if (mode
== CC_RSBmode
)
16431 scratch
= gen_rtx_SCRATCH (SImode
);
16432 emit_insn (gen_rsb_imm_compare_scratch (scratch
,
16433 GEN_INT (~UINTVAL (x
)), y
));
16436 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
16441 /* Generate a sequence of insns that will generate the correct return
16442 address mask depending on the physical architecture that the program
16445 arm_gen_return_addr_mask (void)
16447 rtx reg
= gen_reg_rtx (Pmode
);
16449 emit_insn (gen_return_addr_mask (reg
));
16454 arm_reload_in_hi (rtx
*operands
)
16456 rtx ref
= operands
[1];
16458 HOST_WIDE_INT offset
= 0;
16460 if (SUBREG_P (ref
))
16462 offset
= SUBREG_BYTE (ref
);
16463 ref
= SUBREG_REG (ref
);
16468 /* We have a pseudo which has been spilt onto the stack; there
16469 are two cases here: the first where there is a simple
16470 stack-slot replacement and a second where the stack-slot is
16471 out of range, or is used as a subreg. */
16472 if (reg_equiv_mem (REGNO (ref
)))
16474 ref
= reg_equiv_mem (REGNO (ref
));
16475 base
= find_replacement (&XEXP (ref
, 0));
16478 /* The slot is out of range, or was dressed up in a SUBREG. */
16479 base
= reg_equiv_address (REGNO (ref
));
16481 /* PR 62554: If there is no equivalent memory location then just move
16482 the value as an SImode register move. This happens when the target
16483 architecture variant does not have an HImode register move. */
16486 gcc_assert (REG_P (operands
[0]));
16487 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16488 gen_rtx_SUBREG (SImode
, ref
, 0)));
16493 base
= find_replacement (&XEXP (ref
, 0));
16495 /* Handle the case where the address is too complex to be offset by 1. */
16496 if (GET_CODE (base
) == MINUS
16497 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16499 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16501 emit_set_insn (base_plus
, base
);
16504 else if (GET_CODE (base
) == PLUS
)
16506 /* The addend must be CONST_INT, or we would have dealt with it above. */
16507 HOST_WIDE_INT hi
, lo
;
16509 offset
+= INTVAL (XEXP (base
, 1));
16510 base
= XEXP (base
, 0);
16512 /* Rework the address into a legal sequence of insns. */
16513 /* Valid range for lo is -4095 -> 4095 */
16516 : -((-offset
) & 0xfff));
16518 /* Corner case, if lo is the max offset then we would be out of range
16519 once we have added the additional 1 below, so bump the msb into the
16520 pre-loading insn(s). */
16524 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16525 ^ (HOST_WIDE_INT
) 0x80000000)
16526 - (HOST_WIDE_INT
) 0x80000000);
16528 gcc_assert (hi
+ lo
== offset
);
16532 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16534 /* Get the base address; addsi3 knows how to handle constants
16535 that require more than one insn. */
16536 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16542 /* Operands[2] may overlap operands[0] (though it won't overlap
16543 operands[1]), that's why we asked for a DImode reg -- so we can
16544 use the bit that does not overlap. */
16545 if (REGNO (operands
[2]) == REGNO (operands
[0]))
16546 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16548 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16550 emit_insn (gen_zero_extendqisi2 (scratch
,
16551 gen_rtx_MEM (QImode
,
16552 plus_constant (Pmode
, base
,
16554 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16555 gen_rtx_MEM (QImode
,
16556 plus_constant (Pmode
, base
,
16558 if (!BYTES_BIG_ENDIAN
)
16559 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16560 gen_rtx_IOR (SImode
,
16563 gen_rtx_SUBREG (SImode
, operands
[0], 0),
16567 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
16568 gen_rtx_IOR (SImode
,
16569 gen_rtx_ASHIFT (SImode
, scratch
,
16571 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
16574 /* Handle storing a half-word to memory during reload by synthesizing as two
16575 byte stores. Take care not to clobber the input values until after we
16576 have moved them somewhere safe. This code assumes that if the DImode
16577 scratch in operands[2] overlaps either the input value or output address
16578 in some way, then that value must die in this insn (we absolutely need
16579 two scratch registers for some corner cases). */
16581 arm_reload_out_hi (rtx
*operands
)
16583 rtx ref
= operands
[0];
16584 rtx outval
= operands
[1];
16586 HOST_WIDE_INT offset
= 0;
16588 if (SUBREG_P (ref
))
16590 offset
= SUBREG_BYTE (ref
);
16591 ref
= SUBREG_REG (ref
);
16596 /* We have a pseudo which has been spilt onto the stack; there
16597 are two cases here: the first where there is a simple
16598 stack-slot replacement and a second where the stack-slot is
16599 out of range, or is used as a subreg. */
16600 if (reg_equiv_mem (REGNO (ref
)))
16602 ref
= reg_equiv_mem (REGNO (ref
));
16603 base
= find_replacement (&XEXP (ref
, 0));
16606 /* The slot is out of range, or was dressed up in a SUBREG. */
16607 base
= reg_equiv_address (REGNO (ref
));
16609 /* PR 62254: If there is no equivalent memory location then just move
16610 the value as an SImode register move. This happens when the target
16611 architecture variant does not have an HImode register move. */
16614 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
16616 if (REG_P (outval
))
16618 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16619 gen_rtx_SUBREG (SImode
, outval
, 0)));
16621 else /* SUBREG_P (outval) */
16623 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
16624 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
16625 SUBREG_REG (outval
)));
16627 /* FIXME: Handle other cases ? */
16628 gcc_unreachable ();
16634 base
= find_replacement (&XEXP (ref
, 0));
16636 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
16638 /* Handle the case where the address is too complex to be offset by 1. */
16639 if (GET_CODE (base
) == MINUS
16640 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
16642 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16644 /* Be careful not to destroy OUTVAL. */
16645 if (reg_overlap_mentioned_p (base_plus
, outval
))
16647 /* Updating base_plus might destroy outval, see if we can
16648 swap the scratch and base_plus. */
16649 if (!reg_overlap_mentioned_p (scratch
, outval
))
16650 std::swap (scratch
, base_plus
);
16653 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16655 /* Be conservative and copy OUTVAL into the scratch now,
16656 this should only be necessary if outval is a subreg
16657 of something larger than a word. */
16658 /* XXX Might this clobber base? I can't see how it can,
16659 since scratch is known to overlap with OUTVAL, and
16660 must be wider than a word. */
16661 emit_insn (gen_movhi (scratch_hi
, outval
));
16662 outval
= scratch_hi
;
16666 emit_set_insn (base_plus
, base
);
16669 else if (GET_CODE (base
) == PLUS
)
16671 /* The addend must be CONST_INT, or we would have dealt with it above. */
16672 HOST_WIDE_INT hi
, lo
;
16674 offset
+= INTVAL (XEXP (base
, 1));
16675 base
= XEXP (base
, 0);
16677 /* Rework the address into a legal sequence of insns. */
16678 /* Valid range for lo is -4095 -> 4095 */
16681 : -((-offset
) & 0xfff));
16683 /* Corner case, if lo is the max offset then we would be out of range
16684 once we have added the additional 1 below, so bump the msb into the
16685 pre-loading insn(s). */
16689 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
16690 ^ (HOST_WIDE_INT
) 0x80000000)
16691 - (HOST_WIDE_INT
) 0x80000000);
16693 gcc_assert (hi
+ lo
== offset
);
16697 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
16699 /* Be careful not to destroy OUTVAL. */
16700 if (reg_overlap_mentioned_p (base_plus
, outval
))
16702 /* Updating base_plus might destroy outval, see if we
16703 can swap the scratch and base_plus. */
16704 if (!reg_overlap_mentioned_p (scratch
, outval
))
16705 std::swap (scratch
, base_plus
);
16708 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
16710 /* Be conservative and copy outval into scratch now,
16711 this should only be necessary if outval is a
16712 subreg of something larger than a word. */
16713 /* XXX Might this clobber base? I can't see how it
16714 can, since scratch is known to overlap with
16716 emit_insn (gen_movhi (scratch_hi
, outval
));
16717 outval
= scratch_hi
;
16721 /* Get the base address; addsi3 knows how to handle constants
16722 that require more than one insn. */
16723 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
16729 if (BYTES_BIG_ENDIAN
)
16731 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16732 plus_constant (Pmode
, base
,
16734 gen_lowpart (QImode
, outval
)));
16735 emit_insn (gen_lshrsi3 (scratch
,
16736 gen_rtx_SUBREG (SImode
, outval
, 0),
16738 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16740 gen_lowpart (QImode
, scratch
)));
16744 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
16746 gen_lowpart (QImode
, outval
)));
16747 emit_insn (gen_lshrsi3 (scratch
,
16748 gen_rtx_SUBREG (SImode
, outval
, 0),
16750 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
16751 plus_constant (Pmode
, base
,
16753 gen_lowpart (QImode
, scratch
)));
16757 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16758 (padded to the size of a word) should be passed in a register. */
16761 arm_must_pass_in_stack (const function_arg_info
&arg
)
16763 if (TARGET_AAPCS_BASED
)
16764 return must_pass_in_stack_var_size (arg
);
16766 return must_pass_in_stack_var_size_or_pad (arg
);
16770 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16771 byte of a stack argument has useful data. For legacy APCS ABIs we use
16772 the default. For AAPCS based ABIs small aggregate types are placed
16773 in the lowest memory address. */
16775 static pad_direction
16776 arm_function_arg_padding (machine_mode mode
, const_tree type
)
16778 if (!TARGET_AAPCS_BASED
)
16779 return default_function_arg_padding (mode
, type
);
16781 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
16782 return PAD_DOWNWARD
;
16788 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16789 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16790 register has useful data, and return the opposite if the most
16791 significant byte does. */
16794 arm_pad_reg_upward (machine_mode mode
,
16795 tree type
, int first ATTRIBUTE_UNUSED
)
16797 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
16799 /* For AAPCS, small aggregates, small fixed-point types,
16800 and small complex types are always padded upwards. */
16803 if ((AGGREGATE_TYPE_P (type
)
16804 || TREE_CODE (type
) == COMPLEX_TYPE
16805 || FIXED_POINT_TYPE_P (type
))
16806 && int_size_in_bytes (type
) <= 4)
16811 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
16812 && GET_MODE_SIZE (mode
) <= 4)
16817 /* Otherwise, use default padding. */
16818 return !BYTES_BIG_ENDIAN
;
16821 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16822 assuming that the address in the base register is word aligned. */
16824 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
16826 HOST_WIDE_INT max_offset
;
16828 /* Offset must be a multiple of 4 in Thumb mode. */
16829 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
16834 else if (TARGET_ARM
)
16839 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
16842 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16843 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16844 Assumes that the address in the base register RN is word aligned. Pattern
16845 guarantees that both memory accesses use the same base register,
16846 the offsets are constants within the range, and the gap between the offsets is 4.
16847 If preload complete then check that registers are legal. WBACK indicates whether
16848 address is updated. LOAD indicates whether memory access is load or store. */
16850 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
16851 bool wback
, bool load
)
16853 unsigned int t
, t2
, n
;
16855 if (!reload_completed
)
16858 if (!offset_ok_for_ldrd_strd (offset
))
16865 if ((TARGET_THUMB2
)
16866 && ((wback
&& (n
== t
|| n
== t2
))
16867 || (t
== SP_REGNUM
)
16868 || (t
== PC_REGNUM
)
16869 || (t2
== SP_REGNUM
)
16870 || (t2
== PC_REGNUM
)
16871 || (!load
&& (n
== PC_REGNUM
))
16872 || (load
&& (t
== t2
))
16873 /* Triggers Cortex-M3 LDRD errata. */
16874 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
16878 && ((wback
&& (n
== t
|| n
== t2
))
16879 || (t2
== PC_REGNUM
)
16880 || (t
% 2 != 0) /* First destination register is not even. */
16882 /* PC can be used as base register (for offset addressing only),
16883 but it is depricated. */
16884 || (n
== PC_REGNUM
)))
16890 /* Return true if a 64-bit access with alignment ALIGN and with a
16891 constant offset OFFSET from the base pointer is permitted on this
16894 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
16896 return (unaligned_access
16897 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
16898 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
16901 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16902 operand MEM's address contains an immediate offset from the base
16903 register and has no side effects, in which case it sets BASE,
16904 OFFSET and ALIGN accordingly. */
16906 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
16910 gcc_assert (base
!= NULL
&& offset
!= NULL
);
16912 /* TODO: Handle more general memory operand patterns, such as
16913 PRE_DEC and PRE_INC. */
16915 if (side_effects_p (mem
))
16918 /* Can't deal with subregs. */
16919 if (SUBREG_P (mem
))
16922 gcc_assert (MEM_P (mem
));
16924 *offset
= const0_rtx
;
16925 *align
= MEM_ALIGN (mem
);
16927 addr
= XEXP (mem
, 0);
16929 /* If addr isn't valid for DImode, then we can't handle it. */
16930 if (!arm_legitimate_address_p (DImode
, addr
,
16931 reload_in_progress
|| reload_completed
))
16939 else if (GET_CODE (addr
) == PLUS
)
16941 *base
= XEXP (addr
, 0);
16942 *offset
= XEXP (addr
, 1);
16943 return (REG_P (*base
) && CONST_INT_P (*offset
));
16949 /* Called from a peephole2 to replace two word-size accesses with a
16950 single LDRD/STRD instruction. Returns true iff we can generate a
16951 new instruction sequence. That is, both accesses use the same base
16952 register and the gap between constant offsets is 4. This function
16953 may reorder its operands to match ldrd/strd RTL templates.
16954 OPERANDS are the operands found by the peephole matcher;
16955 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16956 corresponding memory operands. LOAD indicaates whether the access
16957 is load or store. CONST_STORE indicates a store of constant
16958 integer values held in OPERANDS[4,5] and assumes that the pattern
16959 is of length 4 insn, for the purpose of checking dead registers.
16960 COMMUTE indicates that register operands may be reordered. */
16962 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
16963 bool const_store
, bool commute
)
16966 HOST_WIDE_INT offsets
[2], offset
, align
[2];
16967 rtx base
= NULL_RTX
;
16968 rtx cur_base
, cur_offset
, tmp
;
16970 HARD_REG_SET regset
;
16972 gcc_assert (!const_store
|| !load
);
16973 /* Check that the memory references are immediate offsets from the
16974 same base register. Extract the base register, the destination
16975 registers, and the corresponding memory offsets. */
16976 for (i
= 0; i
< nops
; i
++)
16978 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
16984 else if (REGNO (base
) != REGNO (cur_base
))
16987 offsets
[i
] = INTVAL (cur_offset
);
16988 if (GET_CODE (operands
[i
]) == SUBREG
)
16990 tmp
= SUBREG_REG (operands
[i
]);
16991 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
16996 /* Make sure there is no dependency between the individual loads. */
16997 if (load
&& REGNO (operands
[0]) == REGNO (base
))
16998 return false; /* RAW */
17000 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
17001 return false; /* WAW */
17003 /* If the same input register is used in both stores
17004 when storing different constants, try to find a free register.
17005 For example, the code
17010 can be transformed into
17014 in Thumb mode assuming that r1 is free.
17015 For ARM mode do the same but only if the starting register
17016 can be made to be even. */
17018 && REGNO (operands
[0]) == REGNO (operands
[1])
17019 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
17023 CLEAR_HARD_REG_SET (regset
);
17024 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17025 if (tmp
== NULL_RTX
)
17028 /* Use the new register in the first load to ensure that
17029 if the original input register is not dead after peephole,
17030 then it will have the correct constant value. */
17033 else if (TARGET_ARM
)
17035 int regno
= REGNO (operands
[0]);
17036 if (!peep2_reg_dead_p (4, operands
[0]))
17038 /* When the input register is even and is not dead after the
17039 pattern, it has to hold the second constant but we cannot
17040 form a legal STRD in ARM mode with this register as the second
17042 if (regno
% 2 == 0)
17045 /* Is regno-1 free? */
17046 SET_HARD_REG_SET (regset
);
17047 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
17048 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17049 if (tmp
== NULL_RTX
)
17056 /* Find a DImode register. */
17057 CLEAR_HARD_REG_SET (regset
);
17058 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17059 if (tmp
!= NULL_RTX
)
17061 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17062 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17066 /* Can we use the input register to form a DI register? */
17067 SET_HARD_REG_SET (regset
);
17068 CLEAR_HARD_REG_BIT(regset
,
17069 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
17070 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
17071 if (tmp
== NULL_RTX
)
17073 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
17077 gcc_assert (operands
[0] != NULL_RTX
);
17078 gcc_assert (operands
[1] != NULL_RTX
);
17079 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17080 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
17084 /* Make sure the instructions are ordered with lower memory access first. */
17085 if (offsets
[0] > offsets
[1])
17087 gap
= offsets
[0] - offsets
[1];
17088 offset
= offsets
[1];
17090 /* Swap the instructions such that lower memory is accessed first. */
17091 std::swap (operands
[0], operands
[1]);
17092 std::swap (operands
[2], operands
[3]);
17093 std::swap (align
[0], align
[1]);
17095 std::swap (operands
[4], operands
[5]);
17099 gap
= offsets
[1] - offsets
[0];
17100 offset
= offsets
[0];
17103 /* Make sure accesses are to consecutive memory locations. */
17104 if (gap
!= GET_MODE_SIZE (SImode
))
17107 if (!align_ok_ldrd_strd (align
[0], offset
))
17110 /* Make sure we generate legal instructions. */
17111 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17115 /* In Thumb state, where registers are almost unconstrained, there
17116 is little hope to fix it. */
17120 if (load
&& commute
)
17122 /* Try reordering registers. */
17123 std::swap (operands
[0], operands
[1]);
17124 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17131 /* If input registers are dead after this pattern, they can be
17132 reordered or replaced by other registers that are free in the
17133 current pattern. */
17134 if (!peep2_reg_dead_p (4, operands
[0])
17135 || !peep2_reg_dead_p (4, operands
[1]))
17138 /* Try to reorder the input registers. */
17139 /* For example, the code
17144 can be transformed into
17149 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
17152 std::swap (operands
[0], operands
[1]);
17156 /* Try to find a free DI register. */
17157 CLEAR_HARD_REG_SET (regset
);
17158 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
17159 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
17162 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
17163 if (tmp
== NULL_RTX
)
17166 /* DREG must be an even-numbered register in DImode.
17167 Split it into SI registers. */
17168 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
17169 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
17170 gcc_assert (operands
[0] != NULL_RTX
);
17171 gcc_assert (operands
[1] != NULL_RTX
);
17172 gcc_assert (REGNO (operands
[0]) % 2 == 0);
17173 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
17175 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
17185 /* Return true if parallel execution of the two word-size accesses provided
17186 could be satisfied with a single LDRD/STRD instruction. Two word-size
17187 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17188 register operands and OPERANDS[2,3] are the corresponding memory operands.
17191 valid_operands_ldrd_strd (rtx
*operands
, bool load
)
17194 HOST_WIDE_INT offsets
[2], offset
, align
[2];
17195 rtx base
= NULL_RTX
;
17196 rtx cur_base
, cur_offset
;
17199 /* Check that the memory references are immediate offsets from the
17200 same base register. Extract the base register, the destination
17201 registers, and the corresponding memory offsets. */
17202 for (i
= 0; i
< nops
; i
++)
17204 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
17210 else if (REGNO (base
) != REGNO (cur_base
))
17213 offsets
[i
] = INTVAL (cur_offset
);
17214 if (GET_CODE (operands
[i
]) == SUBREG
)
17218 if (offsets
[0] > offsets
[1])
17221 gap
= offsets
[1] - offsets
[0];
17222 offset
= offsets
[0];
17224 /* Make sure accesses are to consecutive memory locations. */
17225 if (gap
!= GET_MODE_SIZE (SImode
))
17228 if (!align_ok_ldrd_strd (align
[0], offset
))
17231 return operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
17236 /* Print a symbolic form of X to the debug file, F. */
17238 arm_print_value (FILE *f
, rtx x
)
17240 switch (GET_CODE (x
))
17243 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
17249 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17250 sizeof (fpstr
), 0, 1);
17260 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
17262 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
17263 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
17271 fprintf (f
, "\"%s\"", XSTR (x
, 0));
17275 fprintf (f
, "`%s'", XSTR (x
, 0));
17279 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
17283 arm_print_value (f
, XEXP (x
, 0));
17287 arm_print_value (f
, XEXP (x
, 0));
17289 arm_print_value (f
, XEXP (x
, 1));
17297 fprintf (f
, "????");
17302 /* Routines for manipulation of the constant pool. */
17304 /* Arm instructions cannot load a large constant directly into a
17305 register; they have to come from a pc relative load. The constant
17306 must therefore be placed in the addressable range of the pc
17307 relative load. Depending on the precise pc relative load
17308 instruction the range is somewhere between 256 bytes and 4k. This
17309 means that we often have to dump a constant inside a function, and
17310 generate code to branch around it.
17312 It is important to minimize this, since the branches will slow
17313 things down and make the code larger.
17315 Normally we can hide the table after an existing unconditional
17316 branch so that there is no interruption of the flow, but in the
17317 worst case the code looks like this:
17335 We fix this by performing a scan after scheduling, which notices
17336 which instructions need to have their operands fetched from the
17337 constant table and builds the table.
17339 The algorithm starts by building a table of all the constants that
17340 need fixing up and all the natural barriers in the function (places
17341 where a constant table can be dropped without breaking the flow).
17342 For each fixup we note how far the pc-relative replacement will be
17343 able to reach and the offset of the instruction into the function.
17345 Having built the table we then group the fixes together to form
17346 tables that are as large as possible (subject to addressing
17347 constraints) and emit each table of constants after the last
17348 barrier that is within range of all the instructions in the group.
17349 If a group does not contain a barrier, then we forcibly create one
17350 by inserting a jump instruction into the flow. Once the table has
17351 been inserted, the insns are then modified to reference the
17352 relevant entry in the pool.
17354 Possible enhancements to the algorithm (not implemented) are:
17356 1) For some processors and object formats, there may be benefit in
17357 aligning the pools to the start of cache lines; this alignment
17358 would need to be taken into account when calculating addressability
17361 /* These typedefs are located at the start of this file, so that
17362 they can be used in the prototypes there. This comment is to
17363 remind readers of that fact so that the following structures
17364 can be understood more easily.
17366 typedef struct minipool_node Mnode;
17367 typedef struct minipool_fixup Mfix; */
17369 struct minipool_node
17371 /* Doubly linked chain of entries. */
17374 /* The maximum offset into the code that this entry can be placed. While
17375 pushing fixes for forward references, all entries are sorted in order
17376 of increasing max_address. */
17377 HOST_WIDE_INT max_address
;
17378 /* Similarly for an entry inserted for a backwards ref. */
17379 HOST_WIDE_INT min_address
;
17380 /* The number of fixes referencing this entry. This can become zero
17381 if we "unpush" an entry. In this case we ignore the entry when we
17382 come to emit the code. */
17384 /* The offset from the start of the minipool. */
17385 HOST_WIDE_INT offset
;
17386 /* The value in table. */
17388 /* The mode of value. */
17390 /* The size of the value. With iWMMXt enabled
17391 sizes > 4 also imply an alignment of 8-bytes. */
17395 struct minipool_fixup
17399 HOST_WIDE_INT address
;
17405 HOST_WIDE_INT forwards
;
17406 HOST_WIDE_INT backwards
;
17409 /* Fixes less than a word need padding out to a word boundary. */
17410 #define MINIPOOL_FIX_SIZE(mode) \
17411 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17413 static Mnode
* minipool_vector_head
;
17414 static Mnode
* minipool_vector_tail
;
17415 static rtx_code_label
*minipool_vector_label
;
17416 static int minipool_pad
;
17418 /* The linked list of all minipool fixes required for this function. */
17419 Mfix
* minipool_fix_head
;
17420 Mfix
* minipool_fix_tail
;
17421 /* The fix entry for the current minipool, once it has been placed. */
17422 Mfix
* minipool_barrier
;
17424 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17425 #define JUMP_TABLES_IN_TEXT_SECTION 0
17428 static HOST_WIDE_INT
17429 get_jump_table_size (rtx_jump_table_data
*insn
)
17431 /* ADDR_VECs only take room if read-only data does into the text
17433 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
17435 rtx body
= PATTERN (insn
);
17436 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
17437 HOST_WIDE_INT size
;
17438 HOST_WIDE_INT modesize
;
17440 modesize
= GET_MODE_SIZE (GET_MODE (body
));
17441 size
= modesize
* XVECLEN (body
, elt
);
17445 /* Round up size of TBB table to a halfword boundary. */
17446 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
17449 /* No padding necessary for TBH. */
17452 /* Add two bytes for alignment on Thumb. */
17457 gcc_unreachable ();
17465 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17466 function descriptor) into a register and the GOT address into the
17467 FDPIC register, returning an rtx for the register holding the
17468 function address. */
17471 arm_load_function_descriptor (rtx funcdesc
)
17473 rtx fnaddr_reg
= gen_reg_rtx (Pmode
);
17474 rtx pic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
17475 rtx fnaddr
= gen_rtx_MEM (Pmode
, funcdesc
);
17476 rtx gotaddr
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, funcdesc
, 4));
17478 emit_move_insn (fnaddr_reg
, fnaddr
);
17480 /* The ABI requires the entry point address to be loaded first, but
17481 since we cannot support lazy binding for lack of atomic load of
17482 two 32-bits values, we do not need to bother to prevent the
17483 previous load from being moved after that of the GOT address. */
17484 emit_insn (gen_restore_pic_register_after_call (pic_reg
, gotaddr
));
17489 /* Return the maximum amount of padding that will be inserted before
17491 static HOST_WIDE_INT
17492 get_label_padding (rtx label
)
17494 HOST_WIDE_INT align
, min_insn_size
;
17496 align
= 1 << label_to_alignment (label
).levels
[0].log
;
17497 min_insn_size
= TARGET_THUMB
? 2 : 4;
17498 return align
> min_insn_size
? align
- min_insn_size
: 0;
17501 /* Move a minipool fix MP from its current location to before MAX_MP.
17502 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17503 constraints may need updating. */
17505 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
17506 HOST_WIDE_INT max_address
)
17508 /* The code below assumes these are different. */
17509 gcc_assert (mp
!= max_mp
);
17511 if (max_mp
== NULL
)
17513 if (max_address
< mp
->max_address
)
17514 mp
->max_address
= max_address
;
17518 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17519 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17521 mp
->max_address
= max_address
;
17523 /* Unlink MP from its current position. Since max_mp is non-null,
17524 mp->prev must be non-null. */
17525 mp
->prev
->next
= mp
->next
;
17526 if (mp
->next
!= NULL
)
17527 mp
->next
->prev
= mp
->prev
;
17529 minipool_vector_tail
= mp
->prev
;
17531 /* Re-insert it before MAX_MP. */
17533 mp
->prev
= max_mp
->prev
;
17536 if (mp
->prev
!= NULL
)
17537 mp
->prev
->next
= mp
;
17539 minipool_vector_head
= mp
;
17542 /* Save the new entry. */
17545 /* Scan over the preceding entries and adjust their addresses as
17547 while (mp
->prev
!= NULL
17548 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17550 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17557 /* Add a constant to the minipool for a forward reference. Returns the
17558 node added or NULL if the constant will not fit in this pool. */
17560 add_minipool_forward_ref (Mfix
*fix
)
17562 /* If set, max_mp is the first pool_entry that has a lower
17563 constraint than the one we are trying to add. */
17564 Mnode
* max_mp
= NULL
;
17565 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
17568 /* If the minipool starts before the end of FIX->INSN then this FIX
17569 cannot be placed into the current pool. Furthermore, adding the
17570 new constant pool entry may cause the pool to start FIX_SIZE bytes
17572 if (minipool_vector_head
&&
17573 (fix
->address
+ get_attr_length (fix
->insn
)
17574 >= minipool_vector_head
->max_address
- fix
->fix_size
))
17577 /* Scan the pool to see if a constant with the same value has
17578 already been added. While we are doing this, also note the
17579 location where we must insert the constant if it doesn't already
17581 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17583 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17584 && fix
->mode
== mp
->mode
17585 && (!LABEL_P (fix
->value
)
17586 || (CODE_LABEL_NUMBER (fix
->value
)
17587 == CODE_LABEL_NUMBER (mp
->value
)))
17588 && rtx_equal_p (fix
->value
, mp
->value
))
17590 /* More than one fix references this entry. */
17592 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
17595 /* Note the insertion point if necessary. */
17597 && mp
->max_address
> max_address
)
17600 /* If we are inserting an 8-bytes aligned quantity and
17601 we have not already found an insertion point, then
17602 make sure that all such 8-byte aligned quantities are
17603 placed at the start of the pool. */
17604 if (ARM_DOUBLEWORD_ALIGN
17606 && fix
->fix_size
>= 8
17607 && mp
->fix_size
< 8)
17610 max_address
= mp
->max_address
;
17614 /* The value is not currently in the minipool, so we need to create
17615 a new entry for it. If MAX_MP is NULL, the entry will be put on
17616 the end of the list since the placement is less constrained than
17617 any existing entry. Otherwise, we insert the new fix before
17618 MAX_MP and, if necessary, adjust the constraints on the other
17621 mp
->fix_size
= fix
->fix_size
;
17622 mp
->mode
= fix
->mode
;
17623 mp
->value
= fix
->value
;
17625 /* Not yet required for a backwards ref. */
17626 mp
->min_address
= -65536;
17628 if (max_mp
== NULL
)
17630 mp
->max_address
= max_address
;
17632 mp
->prev
= minipool_vector_tail
;
17634 if (mp
->prev
== NULL
)
17636 minipool_vector_head
= mp
;
17637 minipool_vector_label
= gen_label_rtx ();
17640 mp
->prev
->next
= mp
;
17642 minipool_vector_tail
= mp
;
17646 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
17647 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
17649 mp
->max_address
= max_address
;
17652 mp
->prev
= max_mp
->prev
;
17654 if (mp
->prev
!= NULL
)
17655 mp
->prev
->next
= mp
;
17657 minipool_vector_head
= mp
;
17660 /* Save the new entry. */
17663 /* Scan over the preceding entries and adjust their addresses as
17665 while (mp
->prev
!= NULL
17666 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
17668 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
17676 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
17677 HOST_WIDE_INT min_address
)
17679 HOST_WIDE_INT offset
;
17681 /* The code below assumes these are different. */
17682 gcc_assert (mp
!= min_mp
);
17684 if (min_mp
== NULL
)
17686 if (min_address
> mp
->min_address
)
17687 mp
->min_address
= min_address
;
17691 /* We will adjust this below if it is too loose. */
17692 mp
->min_address
= min_address
;
17694 /* Unlink MP from its current position. Since min_mp is non-null,
17695 mp->next must be non-null. */
17696 mp
->next
->prev
= mp
->prev
;
17697 if (mp
->prev
!= NULL
)
17698 mp
->prev
->next
= mp
->next
;
17700 minipool_vector_head
= mp
->next
;
17702 /* Reinsert it after MIN_MP. */
17704 mp
->next
= min_mp
->next
;
17706 if (mp
->next
!= NULL
)
17707 mp
->next
->prev
= mp
;
17709 minipool_vector_tail
= mp
;
17715 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17717 mp
->offset
= offset
;
17718 if (mp
->refcount
> 0)
17719 offset
+= mp
->fix_size
;
17721 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17722 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17728 /* Add a constant to the minipool for a backward reference. Returns the
17729 node added or NULL if the constant will not fit in this pool.
17731 Note that the code for insertion for a backwards reference can be
17732 somewhat confusing because the calculated offsets for each fix do
17733 not take into account the size of the pool (which is still under
17736 add_minipool_backward_ref (Mfix
*fix
)
17738 /* If set, min_mp is the last pool_entry that has a lower constraint
17739 than the one we are trying to add. */
17740 Mnode
*min_mp
= NULL
;
17741 /* This can be negative, since it is only a constraint. */
17742 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
17745 /* If we can't reach the current pool from this insn, or if we can't
17746 insert this entry at the end of the pool without pushing other
17747 fixes out of range, then we don't try. This ensures that we
17748 can't fail later on. */
17749 if (min_address
>= minipool_barrier
->address
17750 || (minipool_vector_tail
->min_address
+ fix
->fix_size
17751 >= minipool_barrier
->address
))
17754 /* Scan the pool to see if a constant with the same value has
17755 already been added. While we are doing this, also note the
17756 location where we must insert the constant if it doesn't already
17758 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
17760 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
17761 && fix
->mode
== mp
->mode
17762 && (!LABEL_P (fix
->value
)
17763 || (CODE_LABEL_NUMBER (fix
->value
)
17764 == CODE_LABEL_NUMBER (mp
->value
)))
17765 && rtx_equal_p (fix
->value
, mp
->value
)
17766 /* Check that there is enough slack to move this entry to the
17767 end of the table (this is conservative). */
17768 && (mp
->max_address
17769 > (minipool_barrier
->address
17770 + minipool_vector_tail
->offset
17771 + minipool_vector_tail
->fix_size
)))
17774 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
17777 if (min_mp
!= NULL
)
17778 mp
->min_address
+= fix
->fix_size
;
17781 /* Note the insertion point if necessary. */
17782 if (mp
->min_address
< min_address
)
17784 /* For now, we do not allow the insertion of 8-byte alignment
17785 requiring nodes anywhere but at the start of the pool. */
17786 if (ARM_DOUBLEWORD_ALIGN
17787 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17792 else if (mp
->max_address
17793 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
17795 /* Inserting before this entry would push the fix beyond
17796 its maximum address (which can happen if we have
17797 re-located a forwards fix); force the new fix to come
17799 if (ARM_DOUBLEWORD_ALIGN
17800 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
17805 min_address
= mp
->min_address
+ fix
->fix_size
;
17808 /* Do not insert a non-8-byte aligned quantity before 8-byte
17809 aligned quantities. */
17810 else if (ARM_DOUBLEWORD_ALIGN
17811 && fix
->fix_size
< 8
17812 && mp
->fix_size
>= 8)
17815 min_address
= mp
->min_address
+ fix
->fix_size
;
17820 /* We need to create a new entry. */
17822 mp
->fix_size
= fix
->fix_size
;
17823 mp
->mode
= fix
->mode
;
17824 mp
->value
= fix
->value
;
17826 mp
->max_address
= minipool_barrier
->address
+ 65536;
17828 mp
->min_address
= min_address
;
17830 if (min_mp
== NULL
)
17833 mp
->next
= minipool_vector_head
;
17835 if (mp
->next
== NULL
)
17837 minipool_vector_tail
= mp
;
17838 minipool_vector_label
= gen_label_rtx ();
17841 mp
->next
->prev
= mp
;
17843 minipool_vector_head
= mp
;
17847 mp
->next
= min_mp
->next
;
17851 if (mp
->next
!= NULL
)
17852 mp
->next
->prev
= mp
;
17854 minipool_vector_tail
= mp
;
17857 /* Save the new entry. */
17865 /* Scan over the following entries and adjust their offsets. */
17866 while (mp
->next
!= NULL
)
17868 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
17869 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
17872 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
17874 mp
->next
->offset
= mp
->offset
;
17883 assign_minipool_offsets (Mfix
*barrier
)
17885 HOST_WIDE_INT offset
= 0;
17888 minipool_barrier
= barrier
;
17890 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17892 mp
->offset
= offset
;
17894 if (mp
->refcount
> 0)
17895 offset
+= mp
->fix_size
;
17899 /* Output the literal table */
17901 dump_minipool (rtx_insn
*scan
)
17907 if (ARM_DOUBLEWORD_ALIGN
)
17908 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
17909 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
17916 fprintf (dump_file
,
17917 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17918 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
17920 scan
= emit_label_after (gen_label_rtx (), scan
);
17921 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
17922 scan
= emit_label_after (minipool_vector_label
, scan
);
17924 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
17926 if (mp
->refcount
> 0)
17930 fprintf (dump_file
,
17931 ";; Offset %u, min %ld, max %ld ",
17932 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
17933 (unsigned long) mp
->max_address
);
17934 arm_print_value (dump_file
, mp
->value
);
17935 fputc ('\n', dump_file
);
17938 rtx val
= copy_rtx (mp
->value
);
17940 switch (GET_MODE_SIZE (mp
->mode
))
17942 #ifdef HAVE_consttable_1
17944 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
17948 #ifdef HAVE_consttable_2
17950 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
17954 #ifdef HAVE_consttable_4
17956 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
17960 #ifdef HAVE_consttable_8
17962 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
17966 #ifdef HAVE_consttable_16
17968 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
17973 gcc_unreachable ();
17981 minipool_vector_head
= minipool_vector_tail
= NULL
;
17982 scan
= emit_insn_after (gen_consttable_end (), scan
);
17983 scan
= emit_barrier_after (scan
);
17986 /* Return the cost of forcibly inserting a barrier after INSN. */
17988 arm_barrier_cost (rtx_insn
*insn
)
17990 /* Basing the location of the pool on the loop depth is preferable,
17991 but at the moment, the basic block information seems to be
17992 corrupt by this stage of the compilation. */
17993 int base_cost
= 50;
17994 rtx_insn
*next
= next_nonnote_insn (insn
);
17996 if (next
!= NULL
&& LABEL_P (next
))
17999 switch (GET_CODE (insn
))
18002 /* It will always be better to place the table before the label, rather
18011 return base_cost
- 10;
18014 return base_cost
+ 10;
18018 /* Find the best place in the insn stream in the range
18019 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18020 Create the barrier by inserting a jump and add a new fix entry for
18023 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
18025 HOST_WIDE_INT count
= 0;
18026 rtx_barrier
*barrier
;
18027 rtx_insn
*from
= fix
->insn
;
18028 /* The instruction after which we will insert the jump. */
18029 rtx_insn
*selected
= NULL
;
18031 /* The address at which the jump instruction will be placed. */
18032 HOST_WIDE_INT selected_address
;
18034 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
18035 rtx_code_label
*label
= gen_label_rtx ();
18037 selected_cost
= arm_barrier_cost (from
);
18038 selected_address
= fix
->address
;
18040 while (from
&& count
< max_count
)
18042 rtx_jump_table_data
*tmp
;
18045 /* This code shouldn't have been called if there was a natural barrier
18047 gcc_assert (!BARRIER_P (from
));
18049 /* Count the length of this insn. This must stay in sync with the
18050 code that pushes minipool fixes. */
18051 if (LABEL_P (from
))
18052 count
+= get_label_padding (from
);
18054 count
+= get_attr_length (from
);
18056 /* If there is a jump table, add its length. */
18057 if (tablejump_p (from
, NULL
, &tmp
))
18059 count
+= get_jump_table_size (tmp
);
18061 /* Jump tables aren't in a basic block, so base the cost on
18062 the dispatch insn. If we select this location, we will
18063 still put the pool after the table. */
18064 new_cost
= arm_barrier_cost (from
);
18066 if (count
< max_count
18067 && (!selected
|| new_cost
<= selected_cost
))
18070 selected_cost
= new_cost
;
18071 selected_address
= fix
->address
+ count
;
18074 /* Continue after the dispatch table. */
18075 from
= NEXT_INSN (tmp
);
18079 new_cost
= arm_barrier_cost (from
);
18081 if (count
< max_count
18082 && (!selected
|| new_cost
<= selected_cost
))
18085 selected_cost
= new_cost
;
18086 selected_address
= fix
->address
+ count
;
18089 from
= NEXT_INSN (from
);
18092 /* Make sure that we found a place to insert the jump. */
18093 gcc_assert (selected
);
18095 /* Create a new JUMP_INSN that branches around a barrier. */
18096 from
= emit_jump_insn_after (gen_jump (label
), selected
);
18097 JUMP_LABEL (from
) = label
;
18098 barrier
= emit_barrier_after (from
);
18099 emit_label_after (label
, barrier
);
18101 /* Create a minipool barrier entry for the new barrier. */
18102 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
18103 new_fix
->insn
= barrier
;
18104 new_fix
->address
= selected_address
;
18105 new_fix
->next
= fix
->next
;
18106 fix
->next
= new_fix
;
18111 /* Record that there is a natural barrier in the insn stream at
18114 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
18116 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18119 fix
->address
= address
;
18122 if (minipool_fix_head
!= NULL
)
18123 minipool_fix_tail
->next
= fix
;
18125 minipool_fix_head
= fix
;
18127 minipool_fix_tail
= fix
;
18130 /* Record INSN, which will need fixing up to load a value from the
18131 minipool. ADDRESS is the offset of the insn since the start of the
18132 function; LOC is a pointer to the part of the insn which requires
18133 fixing; VALUE is the constant that must be loaded, which is of type
18136 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
18137 machine_mode mode
, rtx value
)
18139 gcc_assert (!arm_disable_literal_pool
);
18140 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
18143 fix
->address
= address
;
18146 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
18147 fix
->value
= value
;
18148 fix
->forwards
= get_attr_pool_range (insn
);
18149 fix
->backwards
= get_attr_neg_pool_range (insn
);
18150 fix
->minipool
= NULL
;
18152 /* If an insn doesn't have a range defined for it, then it isn't
18153 expecting to be reworked by this code. Better to stop now than
18154 to generate duff assembly code. */
18155 gcc_assert (fix
->forwards
|| fix
->backwards
);
18157 /* If an entry requires 8-byte alignment then assume all constant pools
18158 require 4 bytes of padding. Trying to do this later on a per-pool
18159 basis is awkward because existing pool entries have to be modified. */
18160 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
18165 fprintf (dump_file
,
18166 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18167 GET_MODE_NAME (mode
),
18168 INSN_UID (insn
), (unsigned long) address
,
18169 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
18170 arm_print_value (dump_file
, fix
->value
);
18171 fprintf (dump_file
, "\n");
18174 /* Add it to the chain of fixes. */
18177 if (minipool_fix_head
!= NULL
)
18178 minipool_fix_tail
->next
= fix
;
18180 minipool_fix_head
= fix
;
18182 minipool_fix_tail
= fix
;
18185 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18186 Returns the number of insns needed, or 99 if we always want to synthesize
18189 arm_max_const_double_inline_cost ()
18191 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
18194 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18195 Returns the number of insns needed, or 99 if we don't know how to
18198 arm_const_double_inline_cost (rtx val
)
18200 rtx lowpart
, highpart
;
18203 mode
= GET_MODE (val
);
18205 if (mode
== VOIDmode
)
18208 gcc_assert (GET_MODE_SIZE (mode
) == 8);
18210 lowpart
= gen_lowpart (SImode
, val
);
18211 highpart
= gen_highpart_mode (SImode
, mode
, val
);
18213 gcc_assert (CONST_INT_P (lowpart
));
18214 gcc_assert (CONST_INT_P (highpart
));
18216 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
18217 NULL_RTX
, NULL_RTX
, 0, 0)
18218 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
18219 NULL_RTX
, NULL_RTX
, 0, 0));
18222 /* Cost of loading a SImode constant. */
18224 arm_const_inline_cost (enum rtx_code code
, rtx val
)
18226 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
18227 NULL_RTX
, NULL_RTX
, 1, 0);
18230 /* Return true if it is worthwhile to split a 64-bit constant into two
18231 32-bit operations. This is the case if optimizing for size, or
18232 if we have load delay slots, or if one 32-bit part can be done with
18233 a single data operation. */
18235 arm_const_double_by_parts (rtx val
)
18237 machine_mode mode
= GET_MODE (val
);
18240 if (optimize_size
|| arm_ld_sched
)
18243 if (mode
== VOIDmode
)
18246 part
= gen_highpart_mode (SImode
, mode
, val
);
18248 gcc_assert (CONST_INT_P (part
));
18250 if (const_ok_for_arm (INTVAL (part
))
18251 || const_ok_for_arm (~INTVAL (part
)))
18254 part
= gen_lowpart (SImode
, val
);
18256 gcc_assert (CONST_INT_P (part
));
18258 if (const_ok_for_arm (INTVAL (part
))
18259 || const_ok_for_arm (~INTVAL (part
)))
18265 /* Return true if it is possible to inline both the high and low parts
18266 of a 64-bit constant into 32-bit data processing instructions. */
18268 arm_const_double_by_immediates (rtx val
)
18270 machine_mode mode
= GET_MODE (val
);
18273 if (mode
== VOIDmode
)
18276 part
= gen_highpart_mode (SImode
, mode
, val
);
18278 gcc_assert (CONST_INT_P (part
));
18280 if (!const_ok_for_arm (INTVAL (part
)))
18283 part
= gen_lowpart (SImode
, val
);
18285 gcc_assert (CONST_INT_P (part
));
18287 if (!const_ok_for_arm (INTVAL (part
)))
18293 /* Scan INSN and note any of its operands that need fixing.
18294 If DO_PUSHES is false we do not actually push any of the fixups
18297 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
18301 extract_constrain_insn (insn
);
18303 if (recog_data
.n_alternatives
== 0)
18306 /* Fill in recog_op_alt with information about the constraints of
18308 preprocess_constraints (insn
);
18310 const operand_alternative
*op_alt
= which_op_alt ();
18311 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
18313 /* Things we need to fix can only occur in inputs. */
18314 if (recog_data
.operand_type
[opno
] != OP_IN
)
18317 /* If this alternative is a memory reference, then any mention
18318 of constants in this alternative is really to fool reload
18319 into allowing us to accept one there. We need to fix them up
18320 now so that we output the right code. */
18321 if (op_alt
[opno
].memory_ok
)
18323 rtx op
= recog_data
.operand
[opno
];
18325 if (CONSTANT_P (op
))
18328 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
18329 recog_data
.operand_mode
[opno
], op
);
18331 else if (MEM_P (op
)
18332 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
18333 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
18337 rtx cop
= avoid_constant_pool_reference (op
);
18339 /* Casting the address of something to a mode narrower
18340 than a word can cause avoid_constant_pool_reference()
18341 to return the pool reference itself. That's no good to
18342 us here. Lets just hope that we can use the
18343 constant pool value directly. */
18345 cop
= get_pool_constant (XEXP (op
, 0));
18347 push_minipool_fix (insn
, address
,
18348 recog_data
.operand_loc
[opno
],
18349 recog_data
.operand_mode
[opno
], cop
);
18359 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18360 and unions in the context of ARMv8-M Security Extensions. It is used as a
18361 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18362 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18363 or four masks, depending on whether it is being computed for a
18364 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18365 respectively. The tree for the type of the argument or a field within an
18366 argument is passed in ARG_TYPE, the current register this argument or field
18367 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18368 argument or field starts at is passed in STARTING_BIT and the last used bit
18369 is kept in LAST_USED_BIT which is also updated accordingly. */
18371 static unsigned HOST_WIDE_INT
18372 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
18373 uint32_t * padding_bits_to_clear
,
18374 unsigned starting_bit
, int * last_used_bit
)
18377 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
18379 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
18381 unsigned current_bit
= starting_bit
;
18383 long int offset
, size
;
18386 field
= TYPE_FIELDS (arg_type
);
18389 /* The offset within a structure is always an offset from
18390 the start of that structure. Make sure we take that into the
18391 calculation of the register based offset that we use here. */
18392 offset
= starting_bit
;
18393 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
18396 /* This is the actual size of the field, for bitfields this is the
18397 bitfield width and not the container size. */
18398 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18400 if (*last_used_bit
!= offset
)
18402 if (offset
< *last_used_bit
)
18404 /* This field's offset is before the 'last_used_bit', that
18405 means this field goes on the next register. So we need to
18406 pad the rest of the current register and increase the
18407 register number. */
18409 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
18412 padding_bits_to_clear
[*regno
] |= mask
;
18413 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18418 /* Otherwise we pad the bits between the last field's end and
18419 the start of the new field. */
18422 mask
= ((uint32_t)-1) >> (32 - offset
);
18423 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
18424 padding_bits_to_clear
[*regno
] |= mask
;
18426 current_bit
= offset
;
18429 /* Calculate further padding bits for inner structs/unions too. */
18430 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
18432 *last_used_bit
= current_bit
;
18433 not_to_clear_reg_mask
18434 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
18435 padding_bits_to_clear
, offset
,
18440 /* Update 'current_bit' with this field's size. If the
18441 'current_bit' lies in a subsequent register, update 'regno' and
18442 reset 'current_bit' to point to the current bit in that new
18444 current_bit
+= size
;
18445 while (current_bit
>= 32)
18448 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18451 *last_used_bit
= current_bit
;
18454 field
= TREE_CHAIN (field
);
18456 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
18458 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
18460 tree field
, field_t
;
18461 int i
, regno_t
, field_size
;
18465 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
18466 = {-1, -1, -1, -1};
18468 /* To compute the padding bits in a union we only consider bits as
18469 padding bits if they are always either a padding bit or fall outside a
18470 fields size for all fields in the union. */
18471 field
= TYPE_FIELDS (arg_type
);
18474 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
18475 = {0U, 0U, 0U, 0U};
18476 int last_used_bit_t
= *last_used_bit
;
18478 field_t
= TREE_TYPE (field
);
18480 /* If the field's type is either a record or a union make sure to
18481 compute their padding bits too. */
18482 if (RECORD_OR_UNION_TYPE_P (field_t
))
18483 not_to_clear_reg_mask
18484 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
18485 &padding_bits_to_clear_t
[0],
18486 starting_bit
, &last_used_bit_t
);
18489 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
18490 regno_t
= (field_size
/ 32) + *regno
;
18491 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
18494 for (i
= *regno
; i
< regno_t
; i
++)
18496 /* For all but the last register used by this field only keep the
18497 padding bits that were padding bits in this field. */
18498 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
18501 /* For the last register, keep all padding bits that were padding
18502 bits in this field and any padding bits that are still valid
18503 as padding bits but fall outside of this field's size. */
18504 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
18505 padding_bits_to_clear_res
[regno_t
]
18506 &= padding_bits_to_clear_t
[regno_t
] | mask
;
18508 /* Update the maximum size of the fields in terms of registers used
18509 ('max_reg') and the 'last_used_bit' in said register. */
18510 if (max_reg
< regno_t
)
18513 max_bit
= last_used_bit_t
;
18515 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
18516 max_bit
= last_used_bit_t
;
18518 field
= TREE_CHAIN (field
);
18521 /* Update the current padding_bits_to_clear using the intersection of the
18522 padding bits of all the fields. */
18523 for (i
=*regno
; i
< max_reg
; i
++)
18524 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
18526 /* Do not keep trailing padding bits, we do not know yet whether this
18527 is the end of the argument. */
18528 mask
= ((uint32_t) 1 << max_bit
) - 1;
18529 padding_bits_to_clear
[max_reg
]
18530 |= padding_bits_to_clear_res
[max_reg
] & mask
;
18533 *last_used_bit
= max_bit
;
18536 /* This function should only be used for structs and unions. */
18537 gcc_unreachable ();
18539 return not_to_clear_reg_mask
;
18542 /* In the context of ARMv8-M Security Extensions, this function is used for both
18543 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18544 registers are used when returning or passing arguments, which is then
18545 returned as a mask. It will also compute a mask to indicate padding/unused
18546 bits for each of these registers, and passes this through the
18547 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18548 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18549 the starting register used to pass this argument or return value is passed
18550 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18551 for struct and union types. */
18553 static unsigned HOST_WIDE_INT
18554 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
18555 uint32_t * padding_bits_to_clear
)
18558 int last_used_bit
= 0;
18559 unsigned HOST_WIDE_INT not_to_clear_mask
;
18561 if (RECORD_OR_UNION_TYPE_P (arg_type
))
18564 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
18565 padding_bits_to_clear
, 0,
18569 /* If the 'last_used_bit' is not zero, that means we are still using a
18570 part of the last 'regno'. In such cases we must clear the trailing
18571 bits. Otherwise we are not using regno and we should mark it as to
18573 if (last_used_bit
!= 0)
18574 padding_bits_to_clear
[regno
]
18575 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
18577 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
18581 not_to_clear_mask
= 0;
18582 /* We are not dealing with structs nor unions. So these arguments may be
18583 passed in floating point registers too. In some cases a BLKmode is
18584 used when returning or passing arguments in multiple VFP registers. */
18585 if (GET_MODE (arg_rtx
) == BLKmode
)
18590 /* This should really only occur when dealing with the hard-float
18592 gcc_assert (TARGET_HARD_FLOAT_ABI
);
18594 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
18596 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
18597 gcc_assert (REG_P (reg
));
18599 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
18601 /* If we are dealing with DF mode, make sure we don't
18602 clear either of the registers it addresses. */
18603 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
18606 unsigned HOST_WIDE_INT mask
;
18607 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
18608 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
18609 not_to_clear_mask
|= mask
;
18615 /* Otherwise we can rely on the MODE to determine how many registers
18616 are being used by this argument. */
18617 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
18618 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18621 unsigned HOST_WIDE_INT
18622 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
18623 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
18624 not_to_clear_mask
|= mask
;
18629 return not_to_clear_mask
;
18632 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18633 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18634 are to be fully cleared, using the value in register CLEARING_REG if more
18635 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18636 the bits that needs to be cleared in caller-saved core registers, with
18637 SCRATCH_REG used as a scratch register for that clearing.
18639 NOTE: one of three following assertions must hold:
18640 - SCRATCH_REG is a low register
18641 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18642 in TO_CLEAR_BITMAP)
18643 - CLEARING_REG is a low register. */
18646 cmse_clear_registers (sbitmap to_clear_bitmap
, uint32_t *padding_bits_to_clear
,
18647 int padding_bits_len
, rtx scratch_reg
, rtx clearing_reg
)
18649 bool saved_clearing
= false;
18650 rtx saved_clearing_reg
= NULL_RTX
;
18651 int i
, regno
, clearing_regno
, minregno
= R0_REGNUM
, maxregno
= minregno
- 1;
18653 gcc_assert (arm_arch_cmse
);
18655 if (!bitmap_empty_p (to_clear_bitmap
))
18657 minregno
= bitmap_first_set_bit (to_clear_bitmap
);
18658 maxregno
= bitmap_last_set_bit (to_clear_bitmap
);
18660 clearing_regno
= REGNO (clearing_reg
);
18662 /* Clear padding bits. */
18663 gcc_assert (padding_bits_len
<= NUM_ARG_REGS
);
18664 for (i
= 0, regno
= R0_REGNUM
; i
< padding_bits_len
; i
++, regno
++)
18667 rtx rtx16
, dest
, cleared_reg
= gen_rtx_REG (SImode
, regno
);
18669 if (padding_bits_to_clear
[i
] == 0)
18672 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18673 CLEARING_REG as scratch. */
18675 && REGNO (scratch_reg
) > LAST_LO_REGNUM
)
18677 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18678 such that we can use clearing_reg to clear the unused bits in the
18680 if ((clearing_regno
> maxregno
18681 || !bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18682 && !saved_clearing
)
18684 gcc_assert (clearing_regno
<= LAST_LO_REGNUM
);
18685 emit_move_insn (scratch_reg
, clearing_reg
);
18686 saved_clearing
= true;
18687 saved_clearing_reg
= scratch_reg
;
18689 scratch_reg
= clearing_reg
;
18692 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18693 mask
= (~padding_bits_to_clear
[i
]) & 0xFFFF;
18694 emit_move_insn (scratch_reg
, gen_int_mode (mask
, SImode
));
18696 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18697 mask
= (~padding_bits_to_clear
[i
]) >> 16;
18698 rtx16
= gen_int_mode (16, SImode
);
18699 dest
= gen_rtx_ZERO_EXTRACT (SImode
, scratch_reg
, rtx16
, rtx16
);
18701 emit_insn (gen_rtx_SET (dest
, gen_int_mode (mask
, SImode
)));
18703 emit_insn (gen_andsi3 (cleared_reg
, cleared_reg
, scratch_reg
));
18705 if (saved_clearing
)
18706 emit_move_insn (clearing_reg
, saved_clearing_reg
);
18709 /* Clear full registers. */
18711 if (TARGET_HAVE_FPCXT_CMSE
)
18714 int i
, j
, k
, nb_regs
;
18715 rtx use_seq
, par
, reg
, set
, vunspec
;
18716 int to_clear_bitmap_size
= SBITMAP_SIZE (to_clear_bitmap
);
18717 auto_sbitmap
core_regs_bitmap (to_clear_bitmap_size
);
18718 auto_sbitmap
to_clear_core_bitmap (to_clear_bitmap_size
);
18720 for (i
= FIRST_VFP_REGNUM
; i
<= maxregno
; i
+= nb_regs
)
18722 /* Find next register to clear and exit if none. */
18723 for (; i
<= maxregno
&& !bitmap_bit_p (to_clear_bitmap
, i
); i
++);
18727 /* Compute number of consecutive registers to clear. */
18728 for (j
= i
; j
<= maxregno
&& bitmap_bit_p (to_clear_bitmap
, j
);
18732 /* Create VSCCLRM RTX pattern. */
18733 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 1));
18734 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18735 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18736 VUNSPEC_VSCCLRM_VPR
);
18737 XVECEXP (par
, 0, 0) = vunspec
;
18739 /* Insert VFP register clearing RTX in the pattern. */
18741 for (k
= 1, j
= i
; j
<= maxregno
&& k
< nb_regs
+ 1; j
++)
18743 if (!bitmap_bit_p (to_clear_bitmap
, j
))
18746 reg
= gen_rtx_REG (SFmode
, j
);
18747 set
= gen_rtx_SET (reg
, const0_rtx
);
18748 XVECEXP (par
, 0, k
++) = set
;
18751 use_seq
= get_insns ();
18754 emit_insn_after (use_seq
, emit_insn (par
));
18757 /* Get set of core registers to clear. */
18758 bitmap_clear (core_regs_bitmap
);
18759 bitmap_set_range (core_regs_bitmap
, R0_REGNUM
,
18760 IP_REGNUM
- R0_REGNUM
+ 1);
18761 bitmap_and (to_clear_core_bitmap
, to_clear_bitmap
,
18763 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap
));
18765 if (bitmap_empty_p (to_clear_core_bitmap
))
18768 /* Create clrm RTX pattern. */
18769 nb_regs
= bitmap_count_bits (to_clear_core_bitmap
);
18770 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nb_regs
+ 2));
18772 /* Insert core register clearing RTX in the pattern. */
18774 for (j
= 0, i
= minregno
; j
< nb_regs
; i
++)
18776 if (!bitmap_bit_p (to_clear_core_bitmap
, i
))
18779 reg
= gen_rtx_REG (SImode
, i
);
18780 set
= gen_rtx_SET (reg
, const0_rtx
);
18781 XVECEXP (par
, 0, j
++) = set
;
18785 /* Insert APSR register clearing RTX in the pattern
18786 * along with clobbering CC. */
18787 vunspec_vec
= gen_rtvec (1, gen_int_mode (0, SImode
));
18788 vunspec
= gen_rtx_UNSPEC_VOLATILE (SImode
, vunspec_vec
,
18789 VUNSPEC_CLRM_APSR
);
18791 XVECEXP (par
, 0, j
++) = vunspec
;
18793 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
18794 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
18795 XVECEXP (par
, 0, j
) = clobber
;
18797 use_seq
= get_insns ();
18800 emit_insn_after (use_seq
, emit_insn (par
));
18804 /* If not marked for clearing, clearing_reg already does not contain
18806 if (clearing_regno
<= maxregno
18807 && bitmap_bit_p (to_clear_bitmap
, clearing_regno
))
18809 emit_move_insn (clearing_reg
, const0_rtx
);
18810 emit_use (clearing_reg
);
18811 bitmap_clear_bit (to_clear_bitmap
, clearing_regno
);
18814 for (regno
= minregno
; regno
<= maxregno
; regno
++)
18816 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
18819 if (IS_VFP_REGNUM (regno
))
18821 /* If regno is an even vfp register and its successor is also to
18822 be cleared, use vmov. */
18823 if (TARGET_VFP_DOUBLE
18824 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
18825 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
18827 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
18828 CONST1_RTX (DFmode
));
18829 emit_use (gen_rtx_REG (DFmode
, regno
));
18834 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
18835 CONST1_RTX (SFmode
));
18836 emit_use (gen_rtx_REG (SFmode
, regno
));
18841 emit_move_insn (gen_rtx_REG (SImode
, regno
), clearing_reg
);
18842 emit_use (gen_rtx_REG (SImode
, regno
));
18848 /* Clear core and caller-saved VFP registers not used to pass arguments before
18849 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18850 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18851 libgcc/config/arm/cmse_nonsecure_call.S. */
18854 cmse_nonsecure_call_inline_register_clear (void)
18858 FOR_EACH_BB_FN (bb
, cfun
)
18862 FOR_BB_INSNS (bb
, insn
)
18864 bool clear_callee_saved
= TARGET_HAVE_FPCXT_CMSE
;
18865 /* frame = VFP regs + FPSCR + VPR. */
18866 unsigned lazy_store_stack_frame_size
18867 = (LAST_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1 + 2) * UNITS_PER_WORD
;
18868 unsigned long callee_saved_mask
18869 = ((1 << (LAST_HI_REGNUM
+ 1)) - 1)
18870 & ~((1 << (LAST_ARG_REGNUM
+ 1)) - 1);
18871 unsigned address_regnum
, regno
;
18872 unsigned max_int_regno
18873 = clear_callee_saved
? IP_REGNUM
: LAST_ARG_REGNUM
;
18874 unsigned max_fp_regno
18875 = TARGET_HAVE_FPCXT_CMSE
? LAST_VFP_REGNUM
: D7_VFP_REGNUM
;
18877 = TARGET_HARD_FLOAT_ABI
? max_fp_regno
: max_int_regno
;
18878 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
18880 rtx pat
, call
, unspec
, clearing_reg
, ip_reg
, shift
;
18882 CUMULATIVE_ARGS args_so_far_v
;
18883 cumulative_args_t args_so_far
;
18884 tree arg_type
, fntype
;
18885 bool first_param
= true, lazy_fpclear
= !TARGET_HARD_FLOAT_ABI
;
18886 function_args_iterator args_iter
;
18887 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
18889 if (!NONDEBUG_INSN_P (insn
))
18892 if (!CALL_P (insn
))
18895 pat
= PATTERN (insn
);
18896 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
18897 call
= XVECEXP (pat
, 0, 0);
18899 /* Get the real call RTX if the insn sets a value, ie. returns. */
18900 if (GET_CODE (call
) == SET
)
18901 call
= SET_SRC (call
);
18903 /* Check if it is a cmse_nonsecure_call. */
18904 unspec
= XEXP (call
, 0);
18905 if (GET_CODE (unspec
) != UNSPEC
18906 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
18909 /* Mark registers that needs to be cleared. Those that holds a
18910 parameter are removed from the set further below. */
18911 bitmap_clear (to_clear_bitmap
);
18912 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
,
18913 max_int_regno
- R0_REGNUM
+ 1);
18915 /* Only look at the caller-saved floating point registers in case of
18916 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18917 lazy store and loads which clear both caller- and callee-saved
18921 auto_sbitmap
float_bitmap (maxregno
+ 1);
18923 bitmap_clear (float_bitmap
);
18924 bitmap_set_range (float_bitmap
, FIRST_VFP_REGNUM
,
18925 max_fp_regno
- FIRST_VFP_REGNUM
+ 1);
18926 bitmap_ior (to_clear_bitmap
, to_clear_bitmap
, float_bitmap
);
18929 /* Make sure the register used to hold the function address is not
18931 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
18932 gcc_assert (MEM_P (address
));
18933 gcc_assert (REG_P (XEXP (address
, 0)));
18934 address_regnum
= REGNO (XEXP (address
, 0));
18935 if (address_regnum
<= max_int_regno
)
18936 bitmap_clear_bit (to_clear_bitmap
, address_regnum
);
18938 /* Set basic block of call insn so that df rescan is performed on
18939 insns inserted here. */
18940 set_block_for_insn (insn
, bb
);
18941 df_set_flags (DF_DEFER_INSN_RESCAN
);
18944 /* Make sure the scheduler doesn't schedule other insns beyond
18946 emit_insn (gen_blockage ());
18948 /* Walk through all arguments and clear registers appropriately.
18950 fntype
= TREE_TYPE (MEM_EXPR (address
));
18951 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
18953 args_so_far
= pack_cumulative_args (&args_so_far_v
);
18954 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
18957 uint64_t to_clear_args_mask
;
18959 if (VOID_TYPE_P (arg_type
))
18962 function_arg_info
arg (arg_type
, /*named=*/true);
18964 /* ??? We should advance after processing the argument and pass
18965 the argument we're advancing past. */
18966 arm_function_arg_advance (args_so_far
, arg
);
18968 arg_rtx
= arm_function_arg (args_so_far
, arg
);
18969 gcc_assert (REG_P (arg_rtx
));
18971 = compute_not_to_clear_mask (arg_type
, arg_rtx
,
18973 &padding_bits_to_clear
[0]);
18974 if (to_clear_args_mask
)
18976 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
18978 if (to_clear_args_mask
& (1ULL << regno
))
18979 bitmap_clear_bit (to_clear_bitmap
, regno
);
18983 first_param
= false;
18986 /* We use right shift and left shift to clear the LSB of the address
18987 we jump to instead of using bic, to avoid having to use an extra
18988 register on Thumb-1. */
18989 clearing_reg
= XEXP (address
, 0);
18990 shift
= gen_rtx_LSHIFTRT (SImode
, clearing_reg
, const1_rtx
);
18991 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18992 shift
= gen_rtx_ASHIFT (SImode
, clearing_reg
, const1_rtx
);
18993 emit_insn (gen_rtx_SET (clearing_reg
, shift
));
18995 if (clear_callee_saved
)
18998 emit_multi_reg_push (callee_saved_mask
, callee_saved_mask
);
18999 /* Disable frame debug info in push because it needs to be
19000 disabled for pop (see below). */
19001 RTX_FRAME_RELATED_P (push_insn
) = 0;
19003 /* Lazy store multiple. */
19007 rtx_insn
*add_insn
;
19009 imm
= gen_int_mode (- lazy_store_stack_frame_size
, SImode
);
19010 add_insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19011 stack_pointer_rtx
, imm
));
19012 /* If we have the frame pointer, then it will be the
19013 CFA reg. Otherwise, the stack pointer is the CFA
19014 reg, so we need to emit a CFA adjust. */
19015 if (!frame_pointer_needed
)
19016 arm_add_cfa_adjust_cfa_note (add_insn
,
19017 - lazy_store_stack_frame_size
,
19019 stack_pointer_rtx
);
19020 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx
));
19022 /* Save VFP callee-saved registers. */
19025 vfp_emit_fstmd (D7_VFP_REGNUM
+ 1,
19026 (max_fp_regno
- D7_VFP_REGNUM
) / 2);
19027 /* Disable frame debug info in push because it needs to be
19028 disabled for vpop (see below). */
19029 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19033 /* Clear caller-saved registers that leak before doing a non-secure
19035 ip_reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
19036 cmse_clear_registers (to_clear_bitmap
, padding_bits_to_clear
,
19037 NUM_ARG_REGS
, ip_reg
, clearing_reg
);
19039 seq
= get_insns ();
19041 emit_insn_before (seq
, insn
);
19043 if (TARGET_HAVE_FPCXT_CMSE
)
19045 rtx_insn
*last
, *pop_insn
, *after
= insn
;
19049 /* Lazy load multiple done as part of libcall in Armv8-M. */
19052 rtx imm
= gen_int_mode (lazy_store_stack_frame_size
, SImode
);
19053 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx
));
19054 rtx_insn
*add_insn
=
19055 emit_insn (gen_addsi3 (stack_pointer_rtx
,
19056 stack_pointer_rtx
, imm
));
19057 if (!frame_pointer_needed
)
19058 arm_add_cfa_adjust_cfa_note (add_insn
,
19059 lazy_store_stack_frame_size
,
19061 stack_pointer_rtx
);
19063 /* Restore VFP callee-saved registers. */
19066 int nb_callee_saved_vfp_regs
=
19067 (max_fp_regno
- D7_VFP_REGNUM
) / 2;
19068 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM
+ 1,
19069 nb_callee_saved_vfp_regs
,
19070 stack_pointer_rtx
);
19071 /* Disable frame debug info in vpop because the SP adjustment
19072 is made using a CFA adjustment note while CFA used is
19073 sometimes R7. This then causes an assert failure in the
19074 CFI note creation code. */
19075 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19078 arm_emit_multi_reg_pop (callee_saved_mask
);
19079 pop_insn
= get_last_insn ();
19081 /* Disable frame debug info in pop because they reset the state
19082 of popped registers to what it was at the beginning of the
19083 function, before the prologue. This leads to incorrect state
19084 when doing the pop after the nonsecure call for registers that
19085 are pushed both in prologue and before the nonsecure call.
19087 It also occasionally triggers an assert failure in CFI note
19088 creation code when there are two codepaths to the epilogue,
19089 one of which does not go through the nonsecure call.
19090 Obviously this mean that debugging between the push and pop is
19092 RTX_FRAME_RELATED_P (pop_insn
) = 0;
19094 seq
= get_insns ();
19095 last
= get_last_insn ();
19098 emit_insn_after (seq
, after
);
19100 /* Skip pop we have just inserted after nonsecure call, we know
19101 it does not contain a nonsecure call. */
19108 /* Rewrite move insn into subtract of 0 if the condition codes will
19109 be useful in next conditional jump insn. */
19112 thumb1_reorg (void)
19116 FOR_EACH_BB_FN (bb
, cfun
)
19119 rtx cmp
, op0
, op1
, set
= NULL
;
19120 rtx_insn
*prev
, *insn
= BB_END (bb
);
19121 bool insn_clobbered
= false;
19123 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
19124 insn
= PREV_INSN (insn
);
19126 /* Find the last cbranchsi4_insn in basic block BB. */
19127 if (insn
== BB_HEAD (bb
)
19128 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19131 /* Get the register with which we are comparing. */
19132 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
19133 op0
= XEXP (cmp
, 0);
19134 op1
= XEXP (cmp
, 1);
19136 /* Check that comparison is against ZERO. */
19137 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
19140 /* Find the first flag setting insn before INSN in basic block BB. */
19141 gcc_assert (insn
!= BB_HEAD (bb
));
19142 for (prev
= PREV_INSN (insn
);
19144 && prev
!= BB_HEAD (bb
)
19146 || DEBUG_INSN_P (prev
)
19147 || ((set
= single_set (prev
)) != NULL
19148 && get_attr_conds (prev
) == CONDS_NOCOND
)));
19149 prev
= PREV_INSN (prev
))
19151 if (reg_set_p (op0
, prev
))
19152 insn_clobbered
= true;
19155 /* Skip if op0 is clobbered by insn other than prev. */
19156 if (insn_clobbered
)
19162 dest
= SET_DEST (set
);
19163 src
= SET_SRC (set
);
19164 if (!low_register_operand (dest
, SImode
)
19165 || !low_register_operand (src
, SImode
))
19168 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19169 in INSN. Both src and dest of the move insn are checked. */
19170 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
19172 dest
= copy_rtx (dest
);
19173 src
= copy_rtx (src
);
19174 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
19175 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
19176 INSN_CODE (prev
) = -1;
19177 /* Set test register in INSN to dest. */
19178 XEXP (cmp
, 0) = copy_rtx (dest
);
19179 INSN_CODE (insn
) = -1;
19184 /* Convert instructions to their cc-clobbering variant if possible, since
19185 that allows us to use smaller encodings. */
19188 thumb2_reorg (void)
19193 INIT_REG_SET (&live
);
19195 /* We are freeing block_for_insn in the toplev to keep compatibility
19196 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19197 compute_bb_for_insn ();
19200 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
19202 FOR_EACH_BB_FN (bb
, cfun
)
19204 if ((current_tune
->disparage_flag_setting_t16_encodings
19205 == tune_params::DISPARAGE_FLAGS_ALL
)
19206 && optimize_bb_for_speed_p (bb
))
19210 Convert_Action action
= SKIP
;
19211 Convert_Action action_for_partial_flag_setting
19212 = ((current_tune
->disparage_flag_setting_t16_encodings
19213 != tune_params::DISPARAGE_FLAGS_NEITHER
)
19214 && optimize_bb_for_speed_p (bb
))
19217 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
19218 df_simulate_initialize_backwards (bb
, &live
);
19219 FOR_BB_INSNS_REVERSE (bb
, insn
)
19221 if (NONJUMP_INSN_P (insn
)
19222 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
19223 && GET_CODE (PATTERN (insn
)) == SET
)
19226 rtx pat
= PATTERN (insn
);
19227 rtx dst
= XEXP (pat
, 0);
19228 rtx src
= XEXP (pat
, 1);
19229 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
19231 if (UNARY_P (src
) || BINARY_P (src
))
19232 op0
= XEXP (src
, 0);
19234 if (BINARY_P (src
))
19235 op1
= XEXP (src
, 1);
19237 if (low_register_operand (dst
, SImode
))
19239 switch (GET_CODE (src
))
19242 /* Adding two registers and storing the result
19243 in the first source is already a 16-bit
19245 if (rtx_equal_p (dst
, op0
)
19246 && register_operand (op1
, SImode
))
19249 if (low_register_operand (op0
, SImode
))
19251 /* ADDS <Rd>,<Rn>,<Rm> */
19252 if (low_register_operand (op1
, SImode
))
19254 /* ADDS <Rdn>,#<imm8> */
19255 /* SUBS <Rdn>,#<imm8> */
19256 else if (rtx_equal_p (dst
, op0
)
19257 && CONST_INT_P (op1
)
19258 && IN_RANGE (INTVAL (op1
), -255, 255))
19260 /* ADDS <Rd>,<Rn>,#<imm3> */
19261 /* SUBS <Rd>,<Rn>,#<imm3> */
19262 else if (CONST_INT_P (op1
)
19263 && IN_RANGE (INTVAL (op1
), -7, 7))
19266 /* ADCS <Rd>, <Rn> */
19267 else if (GET_CODE (XEXP (src
, 0)) == PLUS
19268 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
19269 && low_register_operand (XEXP (XEXP (src
, 0), 1),
19271 && COMPARISON_P (op1
)
19272 && cc_register (XEXP (op1
, 0), VOIDmode
)
19273 && maybe_get_arm_condition_code (op1
) == ARM_CS
19274 && XEXP (op1
, 1) == const0_rtx
)
19279 /* RSBS <Rd>,<Rn>,#0
19280 Not handled here: see NEG below. */
19281 /* SUBS <Rd>,<Rn>,#<imm3>
19283 Not handled here: see PLUS above. */
19284 /* SUBS <Rd>,<Rn>,<Rm> */
19285 if (low_register_operand (op0
, SImode
)
19286 && low_register_operand (op1
, SImode
))
19291 /* MULS <Rdm>,<Rn>,<Rdm>
19292 As an exception to the rule, this is only used
19293 when optimizing for size since MULS is slow on all
19294 known implementations. We do not even want to use
19295 MULS in cold code, if optimizing for speed, so we
19296 test the global flag here. */
19297 if (!optimize_size
)
19299 /* Fall through. */
19303 /* ANDS <Rdn>,<Rm> */
19304 if (rtx_equal_p (dst
, op0
)
19305 && low_register_operand (op1
, SImode
))
19306 action
= action_for_partial_flag_setting
;
19307 else if (rtx_equal_p (dst
, op1
)
19308 && low_register_operand (op0
, SImode
))
19309 action
= action_for_partial_flag_setting
== SKIP
19310 ? SKIP
: SWAP_CONV
;
19316 /* ASRS <Rdn>,<Rm> */
19317 /* LSRS <Rdn>,<Rm> */
19318 /* LSLS <Rdn>,<Rm> */
19319 if (rtx_equal_p (dst
, op0
)
19320 && low_register_operand (op1
, SImode
))
19321 action
= action_for_partial_flag_setting
;
19322 /* ASRS <Rd>,<Rm>,#<imm5> */
19323 /* LSRS <Rd>,<Rm>,#<imm5> */
19324 /* LSLS <Rd>,<Rm>,#<imm5> */
19325 else if (low_register_operand (op0
, SImode
)
19326 && CONST_INT_P (op1
)
19327 && IN_RANGE (INTVAL (op1
), 0, 31))
19328 action
= action_for_partial_flag_setting
;
19332 /* RORS <Rdn>,<Rm> */
19333 if (rtx_equal_p (dst
, op0
)
19334 && low_register_operand (op1
, SImode
))
19335 action
= action_for_partial_flag_setting
;
19339 /* MVNS <Rd>,<Rm> */
19340 if (low_register_operand (op0
, SImode
))
19341 action
= action_for_partial_flag_setting
;
19345 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19346 if (low_register_operand (op0
, SImode
))
19351 /* MOVS <Rd>,#<imm8> */
19352 if (CONST_INT_P (src
)
19353 && IN_RANGE (INTVAL (src
), 0, 255))
19354 action
= action_for_partial_flag_setting
;
19358 /* MOVS and MOV<c> with registers have different
19359 encodings, so are not relevant here. */
19367 if (action
!= SKIP
)
19369 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
19370 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
19373 if (action
== SWAP_CONV
)
19375 src
= copy_rtx (src
);
19376 XEXP (src
, 0) = op1
;
19377 XEXP (src
, 1) = op0
;
19378 pat
= gen_rtx_SET (dst
, src
);
19379 vec
= gen_rtvec (2, pat
, clobber
);
19381 else /* action == CONV */
19382 vec
= gen_rtvec (2, pat
, clobber
);
19384 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
19385 INSN_CODE (insn
) = -1;
19389 if (NONDEBUG_INSN_P (insn
))
19390 df_simulate_one_insn_backwards (bb
, insn
, &live
);
19394 CLEAR_REG_SET (&live
);
19397 /* Gcc puts the pool in the wrong place for ARM, since we can only
19398 load addresses a limited distance around the pc. We do some
19399 special munging to move the constant pool values to the correct
19400 point in the code. */
19405 HOST_WIDE_INT address
= 0;
19409 cmse_nonsecure_call_inline_register_clear ();
19411 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19412 if (cfun
->is_thunk
)
19414 else if (TARGET_THUMB1
)
19416 else if (TARGET_THUMB2
)
19419 /* Ensure all insns that must be split have been split at this point.
19420 Otherwise, the pool placement code below may compute incorrect
19421 insn lengths. Note that when optimizing, all insns have already
19422 been split at this point. */
19424 split_all_insns_noflow ();
19426 /* Make sure we do not attempt to create a literal pool even though it should
19427 no longer be necessary to create any. */
19428 if (arm_disable_literal_pool
)
19431 minipool_fix_head
= minipool_fix_tail
= NULL
;
19433 /* The first insn must always be a note, or the code below won't
19434 scan it properly. */
19435 insn
= get_insns ();
19436 gcc_assert (NOTE_P (insn
));
19439 /* Scan all the insns and record the operands that will need fixing. */
19440 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
19442 if (BARRIER_P (insn
))
19443 push_minipool_barrier (insn
, address
);
19444 else if (INSN_P (insn
))
19446 rtx_jump_table_data
*table
;
19448 note_invalid_constants (insn
, address
, true);
19449 address
+= get_attr_length (insn
);
19451 /* If the insn is a vector jump, add the size of the table
19452 and skip the table. */
19453 if (tablejump_p (insn
, NULL
, &table
))
19455 address
+= get_jump_table_size (table
);
19459 else if (LABEL_P (insn
))
19460 /* Add the worst-case padding due to alignment. We don't add
19461 the _current_ padding because the minipool insertions
19462 themselves might change it. */
19463 address
+= get_label_padding (insn
);
19466 fix
= minipool_fix_head
;
19468 /* Now scan the fixups and perform the required changes. */
19473 Mfix
* last_added_fix
;
19474 Mfix
* last_barrier
= NULL
;
19477 /* Skip any further barriers before the next fix. */
19478 while (fix
&& BARRIER_P (fix
->insn
))
19481 /* No more fixes. */
19485 last_added_fix
= NULL
;
19487 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
19489 if (BARRIER_P (ftmp
->insn
))
19491 if (ftmp
->address
>= minipool_vector_head
->max_address
)
19494 last_barrier
= ftmp
;
19496 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
19499 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
19502 /* If we found a barrier, drop back to that; any fixes that we
19503 could have reached but come after the barrier will now go in
19504 the next mini-pool. */
19505 if (last_barrier
!= NULL
)
19507 /* Reduce the refcount for those fixes that won't go into this
19509 for (fdel
= last_barrier
->next
;
19510 fdel
&& fdel
!= ftmp
;
19513 fdel
->minipool
->refcount
--;
19514 fdel
->minipool
= NULL
;
19517 ftmp
= last_barrier
;
19521 /* ftmp is first fix that we can't fit into this pool and
19522 there no natural barriers that we could use. Insert a
19523 new barrier in the code somewhere between the previous
19524 fix and this one, and arrange to jump around it. */
19525 HOST_WIDE_INT max_address
;
19527 /* The last item on the list of fixes must be a barrier, so
19528 we can never run off the end of the list of fixes without
19529 last_barrier being set. */
19532 max_address
= minipool_vector_head
->max_address
;
19533 /* Check that there isn't another fix that is in range that
19534 we couldn't fit into this pool because the pool was
19535 already too large: we need to put the pool before such an
19536 instruction. The pool itself may come just after the
19537 fix because create_fix_barrier also allows space for a
19538 jump instruction. */
19539 if (ftmp
->address
< max_address
)
19540 max_address
= ftmp
->address
+ 1;
19542 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
19545 assign_minipool_offsets (last_barrier
);
19549 if (!BARRIER_P (ftmp
->insn
)
19550 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
19557 /* Scan over the fixes we have identified for this pool, fixing them
19558 up and adding the constants to the pool itself. */
19559 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
19560 this_fix
= this_fix
->next
)
19561 if (!BARRIER_P (this_fix
->insn
))
19564 = plus_constant (Pmode
,
19565 gen_rtx_LABEL_REF (VOIDmode
,
19566 minipool_vector_label
),
19567 this_fix
->minipool
->offset
);
19568 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
19571 dump_minipool (last_barrier
->insn
);
19575 /* From now on we must synthesize any constants that we can't handle
19576 directly. This can happen if the RTL gets split during final
19577 instruction generation. */
19578 cfun
->machine
->after_arm_reorg
= 1;
19580 /* Free the minipool memory. */
19581 obstack_free (&minipool_obstack
, minipool_startobj
);
19584 /* Routines to output assembly language. */
19586 /* Return string representation of passed in real value. */
19587 static const char *
19588 fp_const_from_val (REAL_VALUE_TYPE
*r
)
19590 if (!fp_consts_inited
)
19593 gcc_assert (real_equal (r
, &value_fp0
));
19597 /* OPERANDS[0] is the entire list of insns that constitute pop,
19598 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19599 is in the list, UPDATE is true iff the list contains explicit
19600 update of base register. */
19602 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
19608 const char *conditional
;
19609 int num_saves
= XVECLEN (operands
[0], 0);
19610 unsigned int regno
;
19611 unsigned int regno_base
= REGNO (operands
[1]);
19612 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
19615 offset
+= update
? 1 : 0;
19616 offset
+= return_pc
? 1 : 0;
19618 /* Is the base register in the list? */
19619 for (i
= offset
; i
< num_saves
; i
++)
19621 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
19622 /* If SP is in the list, then the base register must be SP. */
19623 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
19624 /* If base register is in the list, there must be no explicit update. */
19625 if (regno
== regno_base
)
19626 gcc_assert (!update
);
19629 conditional
= reverse
? "%?%D0" : "%?%d0";
19630 /* Can't use POP if returning from an interrupt. */
19631 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
19632 sprintf (pattern
, "pop%s\t{", conditional
);
19635 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19636 It's just a convention, their semantics are identical. */
19637 if (regno_base
== SP_REGNUM
)
19638 sprintf (pattern
, "ldmfd%s\t", conditional
);
19640 sprintf (pattern
, "ldmia%s\t", conditional
);
19642 sprintf (pattern
, "ldm%s\t", conditional
);
19644 strcat (pattern
, reg_names
[regno_base
]);
19646 strcat (pattern
, "!, {");
19648 strcat (pattern
, ", {");
19651 /* Output the first destination register. */
19653 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
19655 /* Output the rest of the destination registers. */
19656 for (i
= offset
+ 1; i
< num_saves
; i
++)
19658 strcat (pattern
, ", ");
19660 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
19663 strcat (pattern
, "}");
19665 if (interrupt_p
&& return_pc
)
19666 strcat (pattern
, "^");
19668 output_asm_insn (pattern
, &cond
);
19672 /* Output the assembly for a store multiple. */
19675 vfp_output_vstmd (rtx
* operands
)
19681 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
19682 ? XEXP (operands
[0], 0)
19683 : XEXP (XEXP (operands
[0], 0), 0);
19684 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
19687 strcpy (pattern
, "vpush%?.64\t{%P1");
19689 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
19691 p
= strlen (pattern
);
19693 gcc_assert (REG_P (operands
[1]));
19695 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
19696 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
19698 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
19700 strcpy (&pattern
[p
], "}");
19702 output_asm_insn (pattern
, operands
);
19707 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19708 number of bytes pushed. */
19711 vfp_emit_fstmd (int base_reg
, int count
)
19718 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19719 register pairs are stored by a store multiple insn. We avoid this
19720 by pushing an extra pair. */
19721 if (count
== 2 && !arm_arch6
)
19723 if (base_reg
== LAST_VFP_REGNUM
- 3)
19728 /* FSTMD may not store more than 16 doubleword registers at once. Split
19729 larger stores into multiple parts (up to a maximum of two, in
19734 /* NOTE: base_reg is an internal register number, so each D register
19736 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
19737 saved
+= vfp_emit_fstmd (base_reg
, 16);
19741 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
19742 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
19744 reg
= gen_rtx_REG (DFmode
, base_reg
);
19747 XVECEXP (par
, 0, 0)
19748 = gen_rtx_SET (gen_frame_mem
19750 gen_rtx_PRE_MODIFY (Pmode
,
19753 (Pmode
, stack_pointer_rtx
,
19756 gen_rtx_UNSPEC (BLKmode
,
19757 gen_rtvec (1, reg
),
19758 UNSPEC_PUSH_MULT
));
19760 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19761 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
19762 RTX_FRAME_RELATED_P (tmp
) = 1;
19763 XVECEXP (dwarf
, 0, 0) = tmp
;
19765 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
19766 RTX_FRAME_RELATED_P (tmp
) = 1;
19767 XVECEXP (dwarf
, 0, 1) = tmp
;
19769 for (i
= 1; i
< count
; i
++)
19771 reg
= gen_rtx_REG (DFmode
, base_reg
);
19773 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
19775 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
19776 plus_constant (Pmode
,
19780 RTX_FRAME_RELATED_P (tmp
) = 1;
19781 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
19784 par
= emit_insn (par
);
19785 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
19786 RTX_FRAME_RELATED_P (par
) = 1;
19791 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19792 has the cmse_nonsecure_call attribute and returns false otherwise. */
19795 detect_cmse_nonsecure_call (tree addr
)
19800 tree fntype
= TREE_TYPE (addr
);
19801 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
19802 TYPE_ATTRIBUTES (fntype
)))
19808 /* Emit a call instruction with pattern PAT. ADDR is the address of
19809 the call target. */
19812 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
19816 insn
= emit_call_insn (pat
);
19818 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19819 If the call might use such an entry, add a use of the PIC register
19820 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19821 if (TARGET_VXWORKS_RTP
19824 && SYMBOL_REF_P (addr
)
19825 && (SYMBOL_REF_DECL (addr
)
19826 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
19827 : !SYMBOL_REF_LOCAL_P (addr
)))
19829 require_pic_register (NULL_RTX
, false /*compute_now*/);
19830 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
19835 rtx fdpic_reg
= gen_rtx_REG (Pmode
, FDPIC_REGNUM
);
19836 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), fdpic_reg
);
19839 if (TARGET_AAPCS_BASED
)
19841 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19842 linker. We need to add an IP clobber to allow setting
19843 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19844 is not needed since it's a fixed register. */
19845 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
19846 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
19850 /* Output a 'call' insn. */
19852 output_call (rtx
*operands
)
19854 gcc_assert (!arm_arch5t
); /* Patterns should call blx <reg> directly. */
19856 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19857 if (REGNO (operands
[0]) == LR_REGNUM
)
19859 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
19860 output_asm_insn ("mov%?\t%0, %|lr", operands
);
19863 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
19865 if (TARGET_INTERWORK
|| arm_arch4t
)
19866 output_asm_insn ("bx%?\t%0", operands
);
19868 output_asm_insn ("mov%?\t%|pc, %0", operands
);
19873 /* Output a move from arm registers to arm registers of a long double
19874 OPERANDS[0] is the destination.
19875 OPERANDS[1] is the source. */
19877 output_mov_long_double_arm_from_arm (rtx
*operands
)
19879 /* We have to be careful here because the two might overlap. */
19880 int dest_start
= REGNO (operands
[0]);
19881 int src_start
= REGNO (operands
[1]);
19885 if (dest_start
< src_start
)
19887 for (i
= 0; i
< 3; i
++)
19889 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19890 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19891 output_asm_insn ("mov%?\t%0, %1", ops
);
19896 for (i
= 2; i
>= 0; i
--)
19898 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
19899 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
19900 output_asm_insn ("mov%?\t%0, %1", ops
);
19908 arm_emit_movpair (rtx dest
, rtx src
)
19910 /* If the src is an immediate, simplify it. */
19911 if (CONST_INT_P (src
))
19913 HOST_WIDE_INT val
= INTVAL (src
);
19914 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
19915 if ((val
>> 16) & 0x0000ffff)
19917 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
19919 GEN_INT ((val
>> 16) & 0x0000ffff));
19920 rtx_insn
*insn
= get_last_insn ();
19921 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19925 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
19926 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
19927 rtx_insn
*insn
= get_last_insn ();
19928 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
19931 /* Output a move between double words. It must be REG<-MEM
19934 output_move_double (rtx
*operands
, bool emit
, int *count
)
19936 enum rtx_code code0
= GET_CODE (operands
[0]);
19937 enum rtx_code code1
= GET_CODE (operands
[1]);
19942 /* The only case when this might happen is when
19943 you are looking at the length of a DImode instruction
19944 that has an invalid constant in it. */
19945 if (code0
== REG
&& code1
!= MEM
)
19947 gcc_assert (!emit
);
19954 unsigned int reg0
= REGNO (operands
[0]);
19955 const bool can_ldrd
= TARGET_LDRD
&& (TARGET_THUMB2
|| (reg0
% 2 == 0));
19957 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
19959 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
19961 switch (GET_CODE (XEXP (operands
[1], 0)))
19968 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
19969 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
19971 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
19976 gcc_assert (can_ldrd
);
19978 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
19985 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
19987 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
19995 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
19997 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
20002 gcc_assert (can_ldrd
);
20004 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
20009 /* Autoicrement addressing modes should never have overlapping
20010 base and destination registers, and overlapping index registers
20011 are already prohibited, so this doesn't need to worry about
20013 otherops
[0] = operands
[0];
20014 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
20015 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
20017 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
20019 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
20021 /* Registers overlap so split out the increment. */
20024 gcc_assert (can_ldrd
);
20025 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
20026 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
20033 /* Use a single insn if we can.
20034 FIXME: IWMMXT allows offsets larger than ldrd can
20035 handle, fix these up with a pair of ldr. */
20038 || !CONST_INT_P (otherops
[2])
20039 || (INTVAL (otherops
[2]) > -256
20040 && INTVAL (otherops
[2]) < 256)))
20043 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
20049 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
20050 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20060 /* Use a single insn if we can.
20061 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20062 fix these up with a pair of ldr. */
20065 || !CONST_INT_P (otherops
[2])
20066 || (INTVAL (otherops
[2]) > -256
20067 && INTVAL (otherops
[2]) < 256)))
20070 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
20076 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
20077 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
20087 /* We might be able to use ldrd %0, %1 here. However the range is
20088 different to ldr/adr, and it is broken on some ARMv7-M
20089 implementations. */
20090 /* Use the second register of the pair to avoid problematic
20092 otherops
[1] = operands
[1];
20094 output_asm_insn ("adr%?\t%0, %1", otherops
);
20095 operands
[1] = otherops
[0];
20099 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20101 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
20108 /* ??? This needs checking for thumb2. */
20110 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
20111 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
20113 otherops
[0] = operands
[0];
20114 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
20115 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
20117 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
20119 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20121 switch ((int) INTVAL (otherops
[2]))
20125 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
20131 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
20137 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
20141 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
20142 operands
[1] = otherops
[0];
20144 && (REG_P (otherops
[2])
20146 || (CONST_INT_P (otherops
[2])
20147 && INTVAL (otherops
[2]) > -256
20148 && INTVAL (otherops
[2]) < 256)))
20150 if (reg_overlap_mentioned_p (operands
[0],
20153 /* Swap base and index registers over to
20154 avoid a conflict. */
20155 std::swap (otherops
[1], otherops
[2]);
20157 /* If both registers conflict, it will usually
20158 have been fixed by a splitter. */
20159 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
20160 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
20164 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20165 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
20172 otherops
[0] = operands
[0];
20174 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
20179 if (CONST_INT_P (otherops
[2]))
20183 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
20184 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
20186 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20192 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
20198 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
20205 return "ldrd%?\t%0, [%1]";
20207 return "ldmia%?\t%1, %M0";
20211 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
20212 /* Take care of overlapping base/data reg. */
20213 if (reg_mentioned_p (operands
[0], operands
[1]))
20217 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20218 output_asm_insn ("ldr%?\t%0, %1", operands
);
20228 output_asm_insn ("ldr%?\t%0, %1", operands
);
20229 output_asm_insn ("ldr%?\t%0, %1", otherops
);
20239 /* Constraints should ensure this. */
20240 gcc_assert (code0
== MEM
&& code1
== REG
);
20241 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
20242 || (TARGET_ARM
&& TARGET_LDRD
));
20244 /* For TARGET_ARM the first source register of an STRD
20245 must be even. This is usually the case for double-word
20246 values but user assembly constraints can force an odd
20247 starting register. */
20248 bool allow_strd
= TARGET_LDRD
20249 && !(TARGET_ARM
&& (REGNO (operands
[1]) & 1) == 1);
20250 switch (GET_CODE (XEXP (operands
[0], 0)))
20256 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
20258 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20263 gcc_assert (allow_strd
);
20265 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
20272 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
20274 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
20282 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
20284 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
20289 gcc_assert (allow_strd
);
20291 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
20296 otherops
[0] = operands
[1];
20297 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
20298 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
20300 /* IWMMXT allows offsets larger than strd can handle,
20301 fix these up with a pair of str. */
20303 && CONST_INT_P (otherops
[2])
20304 && (INTVAL(otherops
[2]) <= -256
20305 || INTVAL(otherops
[2]) >= 256))
20307 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20311 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
20312 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20321 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
20322 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
20328 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
20331 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
20336 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
20341 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
20342 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
20344 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
20348 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
20355 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
20362 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
20367 && (REG_P (otherops
[2])
20369 || (CONST_INT_P (otherops
[2])
20370 && INTVAL (otherops
[2]) > -256
20371 && INTVAL (otherops
[2]) < 256)))
20373 otherops
[0] = operands
[1];
20374 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
20376 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
20382 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
20383 otherops
[1] = operands
[1];
20386 output_asm_insn ("str%?\t%1, %0", operands
);
20387 output_asm_insn ("str%?\t%H1, %0", otherops
);
20397 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20398 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20401 output_move_quad (rtx
*operands
)
20403 if (REG_P (operands
[0]))
20405 /* Load, or reg->reg move. */
20407 if (MEM_P (operands
[1]))
20409 switch (GET_CODE (XEXP (operands
[1], 0)))
20412 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
20417 output_asm_insn ("adr%?\t%0, %1", operands
);
20418 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
20422 gcc_unreachable ();
20430 gcc_assert (REG_P (operands
[1]));
20432 dest
= REGNO (operands
[0]);
20433 src
= REGNO (operands
[1]);
20435 /* This seems pretty dumb, but hopefully GCC won't try to do it
20438 for (i
= 0; i
< 4; i
++)
20440 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20441 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20442 output_asm_insn ("mov%?\t%0, %1", ops
);
20445 for (i
= 3; i
>= 0; i
--)
20447 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
20448 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
20449 output_asm_insn ("mov%?\t%0, %1", ops
);
20455 gcc_assert (MEM_P (operands
[0]));
20456 gcc_assert (REG_P (operands
[1]));
20457 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
20459 switch (GET_CODE (XEXP (operands
[0], 0)))
20462 output_asm_insn ("stm%?\t%m0, %M1", operands
);
20466 gcc_unreachable ();
20473 /* Output a VFP load or store instruction. */
20476 output_move_vfp (rtx
*operands
)
20478 rtx reg
, mem
, addr
, ops
[2];
20479 int load
= REG_P (operands
[0]);
20480 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
20481 int sp
= (!TARGET_VFP_FP16INST
20482 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
20483 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
20488 reg
= operands
[!load
];
20489 mem
= operands
[load
];
20491 mode
= GET_MODE (reg
);
20493 gcc_assert (REG_P (reg
));
20494 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
20495 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
20501 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
20502 gcc_assert (MEM_P (mem
));
20504 addr
= XEXP (mem
, 0);
20506 switch (GET_CODE (addr
))
20509 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20510 ops
[0] = XEXP (addr
, 0);
20515 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20516 ops
[0] = XEXP (addr
, 0);
20521 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
20527 sprintf (buff
, templ
,
20528 load
? "ld" : "st",
20529 dp
? "64" : sp
? "32" : "16",
20531 integer_p
? "\t%@ int" : "");
20532 output_asm_insn (buff
, ops
);
20537 /* Output a Neon double-word or quad-word load or store, or a load
20538 or store for larger structure modes.
20540 WARNING: The ordering of elements is weird in big-endian mode,
20541 because the EABI requires that vectors stored in memory appear
20542 as though they were stored by a VSTM, as required by the EABI.
20543 GCC RTL defines element ordering based on in-memory order.
20544 This can be different from the architectural ordering of elements
20545 within a NEON register. The intrinsics defined in arm_neon.h use the
20546 NEON register element ordering, not the GCC RTL element ordering.
20548 For example, the in-memory ordering of a big-endian a quadword
20549 vector with 16-bit elements when stored from register pair {d0,d1}
20550 will be (lowest address first, d0[N] is NEON register element N):
20552 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20554 When necessary, quadword registers (dN, dN+1) are moved to ARM
20555 registers from rN in the order:
20557 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20559 So that STM/LDM can be used on vectors in ARM registers, and the
20560 same memory layout will result as if VSTM/VLDM were used.
20562 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20563 possible, which allows use of appropriate alignment tags.
20564 Note that the choice of "64" is independent of the actual vector
20565 element size; this size simply ensures that the behavior is
20566 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20568 Due to limitations of those instructions, use of VST1.64/VLD1.64
20569 is not possible if:
20570 - the address contains PRE_DEC, or
20571 - the mode refers to more than 4 double-word registers
20573 In those cases, it would be possible to replace VSTM/VLDM by a
20574 sequence of instructions; this is not currently implemented since
20575 this is not certain to actually improve performance. */
20578 output_move_neon (rtx
*operands
)
20580 rtx reg
, mem
, addr
, ops
[2];
20581 int regno
, nregs
, load
= REG_P (operands
[0]);
20586 reg
= operands
[!load
];
20587 mem
= operands
[load
];
20589 mode
= GET_MODE (reg
);
20591 gcc_assert (REG_P (reg
));
20592 regno
= REGNO (reg
);
20593 nregs
= REG_NREGS (reg
) / 2;
20594 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
20595 || NEON_REGNO_OK_FOR_QUAD (regno
));
20596 gcc_assert (VALID_NEON_DREG_MODE (mode
)
20597 || VALID_NEON_QREG_MODE (mode
)
20598 || VALID_NEON_STRUCT_MODE (mode
));
20599 gcc_assert (MEM_P (mem
));
20601 addr
= XEXP (mem
, 0);
20603 /* Strip off const from addresses like (const (plus (...))). */
20604 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20605 addr
= XEXP (addr
, 0);
20607 switch (GET_CODE (addr
))
20610 /* We have to use vldm / vstm for too-large modes. */
20611 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20613 templ
= "v%smia%%?\t%%0!, %%h1";
20614 ops
[0] = XEXP (addr
, 0);
20618 templ
= "v%s1.64\t%%h1, %%A0";
20625 /* We have to use vldm / vstm in this case, since there is no
20626 pre-decrement form of the vld1 / vst1 instructions. */
20627 templ
= "v%smdb%%?\t%%0!, %%h1";
20628 ops
[0] = XEXP (addr
, 0);
20633 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20634 gcc_unreachable ();
20637 /* We have to use vldm / vstm for too-large modes. */
20640 if (nregs
> 4 || (TARGET_HAVE_MVE
&& nregs
>= 2))
20641 templ
= "v%smia%%?\t%%m0, %%h1";
20643 templ
= "v%s1.64\t%%h1, %%A0";
20649 /* Fall through. */
20651 if (GET_CODE (addr
) == PLUS
)
20652 addr
= XEXP (addr
, 0);
20653 /* Fall through. */
20658 for (i
= 0; i
< nregs
; i
++)
20660 /* We're only using DImode here because it's a convenient
20662 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
20663 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
20664 if (reg_overlap_mentioned_p (ops
[0], mem
))
20666 gcc_assert (overlap
== -1);
20671 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20672 sprintf (buff
, "v%sr.64\t%%P0, %%1", load
? "ld" : "st");
20674 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20675 output_asm_insn (buff
, ops
);
20680 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
20681 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
20682 if (TARGET_HAVE_MVE
&& LABEL_REF_P (addr
))
20683 sprintf (buff
, "v%sr.32\t%%P0, %%1", load
? "ld" : "st");
20685 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
20686 output_asm_insn (buff
, ops
);
20693 gcc_unreachable ();
20696 sprintf (buff
, templ
, load
? "ld" : "st");
20697 output_asm_insn (buff
, ops
);
20702 /* Compute and return the length of neon_mov<mode>, where <mode> is
20703 one of VSTRUCT modes: EI, OI, CI or XI. */
20705 arm_attr_length_move_neon (rtx_insn
*insn
)
20707 rtx reg
, mem
, addr
;
20711 extract_insn_cached (insn
);
20713 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
20715 mode
= GET_MODE (recog_data
.operand
[0]);
20726 gcc_unreachable ();
20730 load
= REG_P (recog_data
.operand
[0]);
20731 reg
= recog_data
.operand
[!load
];
20732 mem
= recog_data
.operand
[load
];
20734 gcc_assert (MEM_P (mem
));
20736 addr
= XEXP (mem
, 0);
20738 /* Strip off const from addresses like (const (plus (...))). */
20739 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
20740 addr
= XEXP (addr
, 0);
20742 if (LABEL_REF_P (addr
) || GET_CODE (addr
) == PLUS
)
20744 int insns
= REG_NREGS (reg
) / 2;
20751 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20755 arm_address_offset_is_imm (rtx_insn
*insn
)
20759 extract_insn_cached (insn
);
20761 if (REG_P (recog_data
.operand
[0]))
20764 mem
= recog_data
.operand
[0];
20766 gcc_assert (MEM_P (mem
));
20768 addr
= XEXP (mem
, 0);
20771 || (GET_CODE (addr
) == PLUS
20772 && REG_P (XEXP (addr
, 0))
20773 && CONST_INT_P (XEXP (addr
, 1))))
20779 /* Output an ADD r, s, #n where n may be too big for one instruction.
20780 If adding zero to one register, output nothing. */
20782 output_add_immediate (rtx
*operands
)
20784 HOST_WIDE_INT n
= INTVAL (operands
[2]);
20786 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
20789 output_multi_immediate (operands
,
20790 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20793 output_multi_immediate (operands
,
20794 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20801 /* Output a multiple immediate operation.
20802 OPERANDS is the vector of operands referred to in the output patterns.
20803 INSTR1 is the output pattern to use for the first constant.
20804 INSTR2 is the output pattern to use for subsequent constants.
20805 IMMED_OP is the index of the constant slot in OPERANDS.
20806 N is the constant value. */
20807 static const char *
20808 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
20809 int immed_op
, HOST_WIDE_INT n
)
20811 #if HOST_BITS_PER_WIDE_INT > 32
20817 /* Quick and easy output. */
20818 operands
[immed_op
] = const0_rtx
;
20819 output_asm_insn (instr1
, operands
);
20824 const char * instr
= instr1
;
20826 /* Note that n is never zero here (which would give no output). */
20827 for (i
= 0; i
< 32; i
+= 2)
20831 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
20832 output_asm_insn (instr
, operands
);
20842 /* Return the name of a shifter operation. */
20843 static const char *
20844 arm_shift_nmem(enum rtx_code code
)
20849 return ARM_LSL_NAME
;
20865 /* Return the appropriate ARM instruction for the operation code.
20866 The returned result should not be overwritten. OP is the rtx of the
20867 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20870 arithmetic_instr (rtx op
, int shift_first_arg
)
20872 switch (GET_CODE (op
))
20878 return shift_first_arg
? "rsb" : "sub";
20893 return arm_shift_nmem(GET_CODE(op
));
20896 gcc_unreachable ();
20900 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20901 for the operation code. The returned result should not be overwritten.
20902 OP is the rtx code of the shift.
20903 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20905 static const char *
20906 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
20909 enum rtx_code code
= GET_CODE (op
);
20914 if (!CONST_INT_P (XEXP (op
, 1)))
20916 output_operand_lossage ("invalid shift operand");
20921 *amountp
= 32 - INTVAL (XEXP (op
, 1));
20929 mnem
= arm_shift_nmem(code
);
20930 if (CONST_INT_P (XEXP (op
, 1)))
20932 *amountp
= INTVAL (XEXP (op
, 1));
20934 else if (REG_P (XEXP (op
, 1)))
20941 output_operand_lossage ("invalid shift operand");
20947 /* We never have to worry about the amount being other than a
20948 power of 2, since this case can never be reloaded from a reg. */
20949 if (!CONST_INT_P (XEXP (op
, 1)))
20951 output_operand_lossage ("invalid shift operand");
20955 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
20957 /* Amount must be a power of two. */
20958 if (*amountp
& (*amountp
- 1))
20960 output_operand_lossage ("invalid shift operand");
20964 *amountp
= exact_log2 (*amountp
);
20965 gcc_assert (IN_RANGE (*amountp
, 0, 31));
20966 return ARM_LSL_NAME
;
20969 output_operand_lossage ("invalid shift operand");
20973 /* This is not 100% correct, but follows from the desire to merge
20974 multiplication by a power of 2 with the recognizer for a
20975 shift. >=32 is not a valid shift for "lsl", so we must try and
20976 output a shift that produces the correct arithmetical result.
20977 Using lsr #32 is identical except for the fact that the carry bit
20978 is not set correctly if we set the flags; but we never use the
20979 carry bit from such an operation, so we can ignore that. */
20980 if (code
== ROTATERT
)
20981 /* Rotate is just modulo 32. */
20983 else if (*amountp
!= (*amountp
& 31))
20985 if (code
== ASHIFT
)
20990 /* Shifts of 0 are no-ops. */
20997 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20998 because /bin/as is horribly restrictive. The judgement about
20999 whether or not each character is 'printable' (and can be output as
21000 is) or not (and must be printed with an octal escape) must be made
21001 with reference to the *host* character set -- the situation is
21002 similar to that discussed in the comments above pp_c_char in
21003 c-pretty-print.cc. */
21005 #define MAX_ASCII_LEN 51
21008 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
21011 int len_so_far
= 0;
21013 fputs ("\t.ascii\t\"", stream
);
21015 for (i
= 0; i
< len
; i
++)
21019 if (len_so_far
>= MAX_ASCII_LEN
)
21021 fputs ("\"\n\t.ascii\t\"", stream
);
21027 if (c
== '\\' || c
== '\"')
21029 putc ('\\', stream
);
21037 fprintf (stream
, "\\%03o", c
);
21042 fputs ("\"\n", stream
);
21046 /* Compute the register save mask for registers 0 through 12
21047 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21049 static unsigned long
21050 arm_compute_save_reg0_reg12_mask (void)
21052 unsigned long func_type
= arm_current_func_type ();
21053 unsigned long save_reg_mask
= 0;
21056 if (IS_INTERRUPT (func_type
))
21058 unsigned int max_reg
;
21059 /* Interrupt functions must not corrupt any registers,
21060 even call clobbered ones. If this is a leaf function
21061 we can just examine the registers used by the RTL, but
21062 otherwise we have to assume that whatever function is
21063 called might clobber anything, and so we have to save
21064 all the call-clobbered registers as well. */
21065 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
21066 /* FIQ handlers have registers r8 - r12 banked, so
21067 we only need to check r0 - r7, Normal ISRs only
21068 bank r14 and r15, so we must check up to r12.
21069 r13 is the stack pointer which is always preserved,
21070 so we do not need to consider it here. */
21075 for (reg
= 0; reg
<= max_reg
; reg
++)
21076 if (reg_needs_saving_p (reg
))
21077 save_reg_mask
|= (1 << reg
);
21079 /* Also save the pic base register if necessary. */
21080 if (PIC_REGISTER_MAY_NEED_SAVING
21081 && crtl
->uses_pic_offset_table
)
21082 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21084 else if (IS_VOLATILE(func_type
))
21086 /* For noreturn functions we historically omitted register saves
21087 altogether. However this really messes up debugging. As a
21088 compromise save just the frame pointers. Combined with the link
21089 register saved elsewhere this should be sufficient to get
21091 if (frame_pointer_needed
)
21092 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21093 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
21094 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21095 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
21096 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
21100 /* In the normal case we only need to save those registers
21101 which are call saved and which are used by this function. */
21102 for (reg
= 0; reg
<= 11; reg
++)
21103 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21104 save_reg_mask
|= (1 << reg
);
21106 /* Handle the frame pointer as a special case. */
21107 if (frame_pointer_needed
)
21108 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21110 /* If we aren't loading the PIC register,
21111 don't stack it even though it may be live. */
21112 if (PIC_REGISTER_MAY_NEED_SAVING
21113 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
21114 || crtl
->uses_pic_offset_table
))
21115 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21117 /* The prologue will copy SP into R0, so save it. */
21118 if (IS_STACKALIGN (func_type
))
21119 save_reg_mask
|= 1;
21122 /* Save registers so the exception handler can modify them. */
21123 if (crtl
->calls_eh_return
)
21129 reg
= EH_RETURN_DATA_REGNO (i
);
21130 if (reg
== INVALID_REGNUM
)
21132 save_reg_mask
|= 1 << reg
;
21136 return save_reg_mask
;
21139 /* Return true if r3 is live at the start of the function. */
21142 arm_r3_live_at_start_p (void)
21144 /* Just look at cfg info, which is still close enough to correct at this
21145 point. This gives false positives for broken functions that might use
21146 uninitialized data that happens to be allocated in r3, but who cares? */
21147 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
21150 /* Compute the number of bytes used to store the static chain register on the
21151 stack, above the stack frame. We need to know this accurately to get the
21152 alignment of the rest of the stack frame correct. */
21155 arm_compute_static_chain_stack_bytes (void)
21157 /* Once the value is updated from the init value of -1, do not
21159 if (cfun
->machine
->static_chain_stack_bytes
!= -1)
21160 return cfun
->machine
->static_chain_stack_bytes
;
21162 /* See the defining assertion in arm_expand_prologue. */
21163 if (IS_NESTED (arm_current_func_type ())
21164 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21165 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21166 || flag_stack_clash_protection
)
21167 && !df_regs_ever_live_p (LR_REGNUM
)))
21168 && arm_r3_live_at_start_p ()
21169 && crtl
->args
.pretend_args_size
== 0)
21175 /* Compute a bit mask of which core registers need to be
21176 saved on the stack for the current function.
21177 This is used by arm_compute_frame_layout, which may add extra registers. */
21179 static unsigned long
21180 arm_compute_save_core_reg_mask (void)
21182 unsigned int save_reg_mask
= 0;
21183 unsigned long func_type
= arm_current_func_type ();
21186 if (IS_NAKED (func_type
))
21187 /* This should never really happen. */
21190 /* If we are creating a stack frame, then we must save the frame pointer,
21191 IP (which will hold the old stack pointer), LR and the PC. */
21192 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21194 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
21197 | (1 << PC_REGNUM
);
21199 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
21201 /* Decide if we need to save the link register.
21202 Interrupt routines have their own banked link register,
21203 so they never need to save it.
21204 Otherwise if we do not use the link register we do not need to save
21205 it. If we are pushing other registers onto the stack however, we
21206 can save an instruction in the epilogue by pushing the link register
21207 now and then popping it back into the PC. This incurs extra memory
21208 accesses though, so we only do it when optimizing for size, and only
21209 if we know that we will not need a fancy return sequence. */
21210 if (df_regs_ever_live_p (LR_REGNUM
)
21213 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
21214 && !crtl
->tail_call_emit
21215 && !crtl
->calls_eh_return
))
21216 save_reg_mask
|= 1 << LR_REGNUM
;
21218 if (cfun
->machine
->lr_save_eliminated
)
21219 save_reg_mask
&= ~ (1 << LR_REGNUM
);
21221 if (TARGET_REALLY_IWMMXT
21222 && ((bit_count (save_reg_mask
)
21223 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
21224 arm_compute_static_chain_stack_bytes())
21227 /* The total number of registers that are going to be pushed
21228 onto the stack is odd. We need to ensure that the stack
21229 is 64-bit aligned before we start to save iWMMXt registers,
21230 and also before we start to create locals. (A local variable
21231 might be a double or long long which we will load/store using
21232 an iWMMXt instruction). Therefore we need to push another
21233 ARM register, so that the stack will be 64-bit aligned. We
21234 try to avoid using the arg registers (r0 -r3) as they might be
21235 used to pass values in a tail call. */
21236 for (reg
= 4; reg
<= 12; reg
++)
21237 if ((save_reg_mask
& (1 << reg
)) == 0)
21241 save_reg_mask
|= (1 << reg
);
21244 cfun
->machine
->sibcall_blocked
= 1;
21245 save_reg_mask
|= (1 << 3);
21249 /* We may need to push an additional register for use initializing the
21250 PIC base register. */
21251 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
21252 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
21254 reg
= thumb_find_work_register (1 << 4);
21255 if (!call_used_or_fixed_reg_p (reg
))
21256 save_reg_mask
|= (1 << reg
);
21259 return save_reg_mask
;
21262 /* Compute a bit mask of which core registers need to be
21263 saved on the stack for the current function. */
21264 static unsigned long
21265 thumb1_compute_save_core_reg_mask (void)
21267 unsigned long mask
;
21271 for (reg
= 0; reg
< 12; reg
++)
21272 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
21275 /* Handle the frame pointer as a special case. */
21276 if (frame_pointer_needed
)
21277 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
21280 && !TARGET_SINGLE_PIC_BASE
21281 && arm_pic_register
!= INVALID_REGNUM
21282 && crtl
->uses_pic_offset_table
)
21283 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
21285 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21286 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21287 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
21289 /* LR will also be pushed if any lo regs are pushed. */
21290 if (mask
& 0xff || thumb_force_lr_save ())
21291 mask
|= (1 << LR_REGNUM
);
21293 bool call_clobbered_scratch
21294 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21295 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21297 /* Make sure we have a low work register if we need one. We will
21298 need one if we are going to push a high register, but we are not
21299 currently intending to push a low register. However if both the
21300 prologue and epilogue have a spare call-clobbered low register,
21301 then we won't need to find an additional work register. It does
21302 not need to be the same register in the prologue and
21304 if ((mask
& 0xff) == 0
21305 && !call_clobbered_scratch
21306 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
21308 /* Use thumb_find_work_register to choose which register
21309 we will use. If the register is live then we will
21310 have to push it. Use LAST_LO_REGNUM as our fallback
21311 choice for the register to select. */
21312 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
21313 /* Make sure the register returned by thumb_find_work_register is
21314 not part of the return value. */
21315 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
21316 reg
= LAST_LO_REGNUM
;
21318 if (callee_saved_reg_p (reg
))
21322 /* The 504 below is 8 bytes less than 512 because there are two possible
21323 alignment words. We can't tell here if they will be present or not so we
21324 have to play it safe and assume that they are. */
21325 if ((CALLER_INTERWORKING_SLOT_SIZE
+
21326 ROUND_UP_WORD (get_frame_size ()) +
21327 crtl
->outgoing_args_size
) >= 504)
21329 /* This is the same as the code in thumb1_expand_prologue() which
21330 determines which register to use for stack decrement. */
21331 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
21332 if (mask
& (1 << reg
))
21335 if (reg
> LAST_LO_REGNUM
)
21337 /* Make sure we have a register available for stack decrement. */
21338 mask
|= 1 << LAST_LO_REGNUM
;
21345 /* Return the number of bytes required to save VFP registers. */
21347 arm_get_vfp_saved_size (void)
21349 unsigned int regno
;
21354 /* Space for saved VFP registers. */
21355 if (TARGET_VFP_BASE
)
21358 for (regno
= FIRST_VFP_REGNUM
;
21359 regno
< LAST_VFP_REGNUM
;
21362 if (!reg_needs_saving_p (regno
) && !reg_needs_saving_p (regno
+ 1))
21366 /* Workaround ARM10 VFPr1 bug. */
21367 if (count
== 2 && !arm_arch6
)
21369 saved
+= count
* 8;
21378 if (count
== 2 && !arm_arch6
)
21380 saved
+= count
* 8;
21387 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21388 everything bar the final return instruction. If simple_return is true,
21389 then do not output epilogue, because it has already been emitted in RTL.
21391 Note: do not forget to update length attribute of corresponding insn pattern
21392 when changing assembly output (eg. length attribute of
21393 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21394 register clearing sequences). */
21396 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
21397 bool simple_return
)
21399 char conditional
[10];
21402 unsigned long live_regs_mask
;
21403 unsigned long func_type
;
21404 arm_stack_offsets
*offsets
;
21406 func_type
= arm_current_func_type ();
21408 if (IS_NAKED (func_type
))
21411 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
21413 /* If this function was declared non-returning, and we have
21414 found a tail call, then we have to trust that the called
21415 function won't return. */
21420 /* Otherwise, trap an attempted return by aborting. */
21422 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
21424 assemble_external_libcall (ops
[1]);
21425 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
21431 gcc_assert (!cfun
->calls_alloca
|| really_return
);
21433 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
21435 cfun
->machine
->return_used_this_function
= 1;
21437 offsets
= arm_get_frame_offsets ();
21438 live_regs_mask
= offsets
->saved_regs_mask
;
21440 if (!simple_return
&& live_regs_mask
)
21442 const char * return_reg
;
21444 /* If we do not have any special requirements for function exit
21445 (e.g. interworking) then we can load the return address
21446 directly into the PC. Otherwise we must load it into LR. */
21448 && !IS_CMSE_ENTRY (func_type
)
21449 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
21450 return_reg
= reg_names
[PC_REGNUM
];
21452 return_reg
= reg_names
[LR_REGNUM
];
21454 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
21456 /* There are three possible reasons for the IP register
21457 being saved. 1) a stack frame was created, in which case
21458 IP contains the old stack pointer, or 2) an ISR routine
21459 corrupted it, or 3) it was saved to align the stack on
21460 iWMMXt. In case 1, restore IP into SP, otherwise just
21462 if (frame_pointer_needed
)
21464 live_regs_mask
&= ~ (1 << IP_REGNUM
);
21465 live_regs_mask
|= (1 << SP_REGNUM
);
21468 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
21471 /* On some ARM architectures it is faster to use LDR rather than
21472 LDM to load a single register. On other architectures, the
21473 cost is the same. In 26 bit mode, or for exception handlers,
21474 we have to use LDM to load the PC so that the CPSR is also
21476 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
21477 if (live_regs_mask
== (1U << reg
))
21480 if (reg
<= LAST_ARM_REGNUM
21481 && (reg
!= LR_REGNUM
21483 || ! IS_INTERRUPT (func_type
)))
21485 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
21486 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
21493 /* Generate the load multiple instruction to restore the
21494 registers. Note we can get here, even if
21495 frame_pointer_needed is true, but only if sp already
21496 points to the base of the saved core registers. */
21497 if (live_regs_mask
& (1 << SP_REGNUM
))
21499 unsigned HOST_WIDE_INT stack_adjust
;
21501 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
21502 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
21504 if (stack_adjust
&& arm_arch5t
&& TARGET_ARM
)
21505 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
21508 /* If we can't use ldmib (SA110 bug),
21509 then try to pop r3 instead. */
21511 live_regs_mask
|= 1 << 3;
21513 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
21516 /* For interrupt returns we have to use an LDM rather than
21517 a POP so that we can use the exception return variant. */
21518 else if (IS_INTERRUPT (func_type
))
21519 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
21521 sprintf (instr
, "pop%s\t{", conditional
);
21523 p
= instr
+ strlen (instr
);
21525 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
21526 if (live_regs_mask
& (1 << reg
))
21528 int l
= strlen (reg_names
[reg
]);
21534 memcpy (p
, ", ", 2);
21538 memcpy (p
, "%|", 2);
21539 memcpy (p
+ 2, reg_names
[reg
], l
);
21543 if (live_regs_mask
& (1 << LR_REGNUM
))
21545 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
21546 /* If returning from an interrupt, restore the CPSR. */
21547 if (IS_INTERRUPT (func_type
))
21554 output_asm_insn (instr
, & operand
);
21556 /* See if we need to generate an extra instruction to
21557 perform the actual function return. */
21559 && func_type
!= ARM_FT_INTERWORKED
21560 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
21562 /* The return has already been handled
21563 by loading the LR into the PC. */
21570 switch ((int) ARM_FUNC_TYPE (func_type
))
21574 /* ??? This is wrong for unified assembly syntax. */
21575 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
21578 case ARM_FT_INTERWORKED
:
21579 gcc_assert (arm_arch5t
|| arm_arch4t
);
21580 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21583 case ARM_FT_EXCEPTION
:
21584 /* ??? This is wrong for unified assembly syntax. */
21585 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
21589 if (IS_CMSE_ENTRY (func_type
))
21591 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21592 emitted by cmse_nonsecure_entry_clear_before_return () and the
21593 VSTR/VLDR instructions in the prologue and epilogue. */
21594 if (!TARGET_HAVE_FPCXT_CMSE
)
21596 /* Check if we have to clear the 'GE bits' which is only used if
21597 parallel add and subtraction instructions are available. */
21598 if (TARGET_INT_SIMD
)
21599 snprintf (instr
, sizeof (instr
),
21600 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
21602 snprintf (instr
, sizeof (instr
),
21603 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
21605 output_asm_insn (instr
, & operand
);
21606 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21608 if (TARGET_HARD_FLOAT
)
21610 /* Clear the cumulative exception-status bits (0-4,7) and
21611 the condition code bits (28-31) of the FPSCR. We need
21612 to remember to clear the first scratch register used
21613 (IP) and save and restore the second (r4).
21615 Important note: the length of the
21616 thumb2_cmse_entry_return insn pattern must account for
21617 the size of the below instructions. */
21618 output_asm_insn ("push\t{%|r4}", & operand
);
21619 output_asm_insn ("vmrs\t%|ip, fpscr", & operand
);
21620 output_asm_insn ("movw\t%|r4, #65376", & operand
);
21621 output_asm_insn ("movt\t%|r4, #4095", & operand
);
21622 output_asm_insn ("and\t%|ip, %|r4", & operand
);
21623 output_asm_insn ("vmsr\tfpscr, %|ip", & operand
);
21624 output_asm_insn ("pop\t{%|r4}", & operand
);
21625 output_asm_insn ("mov\t%|ip, %|lr", & operand
);
21628 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
21630 /* Use bx if it's available. */
21631 else if (arm_arch5t
|| arm_arch4t
)
21632 sprintf (instr
, "bx%s\t%%|lr", conditional
);
21634 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
21638 output_asm_insn (instr
, & operand
);
21644 /* Output in FILE asm statements needed to declare the NAME of the function
21645 defined by its DECL node. */
21648 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
21650 size_t cmse_name_len
;
21651 char *cmse_name
= 0;
21652 char cmse_prefix
[] = "__acle_se_";
21654 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21655 extra function label for each function with the 'cmse_nonsecure_entry'
21656 attribute. This extra function label should be prepended with
21657 '__acle_se_', telling the linker that it needs to create secure gateway
21658 veneers for this function. */
21659 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
21660 DECL_ATTRIBUTES (decl
)))
21662 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
21663 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
21664 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
21665 targetm
.asm_out
.globalize_label (file
, cmse_name
);
21667 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
21668 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
21671 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
21672 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21673 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21674 ASM_OUTPUT_LABEL (file
, name
);
21677 ASM_OUTPUT_LABEL (file
, cmse_name
);
21679 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
21682 /* Write the function name into the code section, directly preceding
21683 the function prologue.
21685 Code will be output similar to this:
21687 .ascii "arm_poke_function_name", 0
21690 .word 0xff000000 + (t1 - t0)
21691 arm_poke_function_name
21693 stmfd sp!, {fp, ip, lr, pc}
21696 When performing a stack backtrace, code can inspect the value
21697 of 'pc' stored at 'fp' + 0. If the trace function then looks
21698 at location pc - 12 and the top 8 bits are set, then we know
21699 that there is a function name embedded immediately preceding this
21700 location and has length ((pc[-3]) & 0xff000000).
21702 We assume that pc is declared as a pointer to an unsigned long.
21704 It is of no benefit to output the function name if we are assembling
21705 a leaf function. These function types will not contain a stack
21706 backtrace structure, therefore it is not possible to determine the
21709 arm_poke_function_name (FILE *stream
, const char *name
)
21711 unsigned long alignlength
;
21712 unsigned long length
;
21715 length
= strlen (name
) + 1;
21716 alignlength
= ROUND_UP_WORD (length
);
21718 ASM_OUTPUT_ASCII (stream
, name
, length
);
21719 ASM_OUTPUT_ALIGN (stream
, 2);
21720 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
21721 assemble_aligned_integer (UNITS_PER_WORD
, x
);
21724 /* Place some comments into the assembler stream
21725 describing the current function. */
21727 arm_output_function_prologue (FILE *f
)
21729 unsigned long func_type
;
21731 /* Sanity check. */
21732 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
21734 func_type
= arm_current_func_type ();
21736 switch ((int) ARM_FUNC_TYPE (func_type
))
21739 case ARM_FT_NORMAL
:
21741 case ARM_FT_INTERWORKED
:
21742 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
21745 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
21748 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
21750 case ARM_FT_EXCEPTION
:
21751 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
21755 if (IS_NAKED (func_type
))
21756 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21758 if (IS_VOLATILE (func_type
))
21759 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
21761 if (IS_NESTED (func_type
))
21762 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
21763 if (IS_STACKALIGN (func_type
))
21764 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21765 if (IS_CMSE_ENTRY (func_type
))
21766 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
21768 asm_fprintf (f
, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21769 (HOST_WIDE_INT
) crtl
->args
.size
,
21770 crtl
->args
.pretend_args_size
,
21771 (HOST_WIDE_INT
) get_frame_size ());
21773 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21774 frame_pointer_needed
,
21775 cfun
->machine
->uses_anonymous_args
);
21777 if (cfun
->machine
->lr_save_eliminated
)
21778 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
21780 if (crtl
->calls_eh_return
)
21781 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
21786 arm_output_function_epilogue (FILE *)
21788 arm_stack_offsets
*offsets
;
21794 /* Emit any call-via-reg trampolines that are needed for v4t support
21795 of call_reg and call_value_reg type insns. */
21796 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21798 rtx label
= cfun
->machine
->call_via
[regno
];
21802 switch_to_section (function_section (current_function_decl
));
21803 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21804 CODE_LABEL_NUMBER (label
));
21805 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21809 /* ??? Probably not safe to set this here, since it assumes that a
21810 function will be emitted as assembly immediately after we generate
21811 RTL for it. This does not happen for inline functions. */
21812 cfun
->machine
->return_used_this_function
= 0;
21814 else /* TARGET_32BIT */
21816 /* We need to take into account any stack-frame rounding. */
21817 offsets
= arm_get_frame_offsets ();
21819 gcc_assert (!use_return_insn (FALSE
, NULL
)
21820 || (cfun
->machine
->return_used_this_function
!= 0)
21821 || offsets
->saved_regs
== offsets
->outgoing_args
21822 || frame_pointer_needed
);
21826 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21827 STR and STRD. If an even number of registers are being pushed, one
21828 or more STRD patterns are created for each register pair. If an
21829 odd number of registers are pushed, emit an initial STR followed by
21830 as many STRD instructions as are needed. This works best when the
21831 stack is initially 64-bit aligned (the normal case), since it
21832 ensures that each STRD is also 64-bit aligned. */
21834 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
21839 rtx par
= NULL_RTX
;
21840 rtx dwarf
= NULL_RTX
;
21844 num_regs
= bit_count (saved_regs_mask
);
21846 /* Must be at least one register to save, and can't save SP or PC. */
21847 gcc_assert (num_regs
> 0 && num_regs
<= 14);
21848 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
21849 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
21851 /* Create sequence for DWARF info. All the frame-related data for
21852 debugging is held in this wrapper. */
21853 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
21855 /* Describe the stack adjustment. */
21856 tmp
= gen_rtx_SET (stack_pointer_rtx
,
21857 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
21858 RTX_FRAME_RELATED_P (tmp
) = 1;
21859 XVECEXP (dwarf
, 0, 0) = tmp
;
21861 /* Find the first register. */
21862 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
21867 /* If there's an odd number of registers to push. Start off by
21868 pushing a single register. This ensures that subsequent strd
21869 operations are dword aligned (assuming that SP was originally
21870 64-bit aligned). */
21871 if ((num_regs
& 1) != 0)
21873 rtx reg
, mem
, insn
;
21875 reg
= gen_rtx_REG (SImode
, regno
);
21877 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
21878 stack_pointer_rtx
));
21880 mem
= gen_frame_mem (Pmode
,
21882 (Pmode
, stack_pointer_rtx
,
21883 plus_constant (Pmode
, stack_pointer_rtx
,
21886 tmp
= gen_rtx_SET (mem
, reg
);
21887 RTX_FRAME_RELATED_P (tmp
) = 1;
21888 insn
= emit_insn (tmp
);
21889 RTX_FRAME_RELATED_P (insn
) = 1;
21890 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21891 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
21892 RTX_FRAME_RELATED_P (tmp
) = 1;
21895 XVECEXP (dwarf
, 0, i
) = tmp
;
21899 while (i
< num_regs
)
21900 if (saved_regs_mask
& (1 << regno
))
21902 rtx reg1
, reg2
, mem1
, mem2
;
21903 rtx tmp0
, tmp1
, tmp2
;
21906 /* Find the register to pair with this one. */
21907 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
21911 reg1
= gen_rtx_REG (SImode
, regno
);
21912 reg2
= gen_rtx_REG (SImode
, regno2
);
21919 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21922 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21924 -4 * (num_regs
- 1)));
21925 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
21926 plus_constant (Pmode
, stack_pointer_rtx
,
21928 tmp1
= gen_rtx_SET (mem1
, reg1
);
21929 tmp2
= gen_rtx_SET (mem2
, reg2
);
21930 RTX_FRAME_RELATED_P (tmp0
) = 1;
21931 RTX_FRAME_RELATED_P (tmp1
) = 1;
21932 RTX_FRAME_RELATED_P (tmp2
) = 1;
21933 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
21934 XVECEXP (par
, 0, 0) = tmp0
;
21935 XVECEXP (par
, 0, 1) = tmp1
;
21936 XVECEXP (par
, 0, 2) = tmp2
;
21937 insn
= emit_insn (par
);
21938 RTX_FRAME_RELATED_P (insn
) = 1;
21939 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21943 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21946 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
21949 tmp1
= gen_rtx_SET (mem1
, reg1
);
21950 tmp2
= gen_rtx_SET (mem2
, reg2
);
21951 RTX_FRAME_RELATED_P (tmp1
) = 1;
21952 RTX_FRAME_RELATED_P (tmp2
) = 1;
21953 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
21954 XVECEXP (par
, 0, 0) = tmp1
;
21955 XVECEXP (par
, 0, 1) = tmp2
;
21959 /* Create unwind information. This is an approximation. */
21960 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
21961 plus_constant (Pmode
,
21965 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
21966 plus_constant (Pmode
,
21971 RTX_FRAME_RELATED_P (tmp1
) = 1;
21972 RTX_FRAME_RELATED_P (tmp2
) = 1;
21973 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
21974 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
21976 regno
= regno2
+ 1;
21984 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21985 whenever possible, otherwise it emits single-word stores. The first store
21986 also allocates stack space for all saved registers, using writeback with
21987 post-addressing mode. All other stores use offset addressing. If no STRD
21988 can be emitted, this function emits a sequence of single-word stores,
21989 and not an STM as before, because single-word stores provide more freedom
21990 scheduling and can be turned into an STM by peephole optimizations. */
21992 arm_emit_strd_push (unsigned long saved_regs_mask
)
21995 int i
, j
, dwarf_index
= 0;
21997 rtx dwarf
= NULL_RTX
;
21998 rtx insn
= NULL_RTX
;
22001 /* TODO: A more efficient code can be emitted by changing the
22002 layout, e.g., first push all pairs that can use STRD to keep the
22003 stack aligned, and then push all other registers. */
22004 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22005 if (saved_regs_mask
& (1 << i
))
22008 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22009 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
22010 gcc_assert (num_regs
> 0);
22012 /* Create sequence for DWARF info. */
22013 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22015 /* For dwarf info, we generate explicit stack update. */
22016 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22017 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22018 RTX_FRAME_RELATED_P (tmp
) = 1;
22019 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22021 /* Save registers. */
22022 offset
= - 4 * num_regs
;
22024 while (j
<= LAST_ARM_REGNUM
)
22025 if (saved_regs_mask
& (1 << j
))
22028 && (saved_regs_mask
& (1 << (j
+ 1))))
22030 /* Current register and previous register form register pair for
22031 which STRD can be generated. */
22034 /* Allocate stack space for all saved registers. */
22035 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22036 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22037 mem
= gen_frame_mem (DImode
, tmp
);
22040 else if (offset
> 0)
22041 mem
= gen_frame_mem (DImode
,
22042 plus_constant (Pmode
,
22046 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22048 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
22049 RTX_FRAME_RELATED_P (tmp
) = 1;
22050 tmp
= emit_insn (tmp
);
22052 /* Record the first store insn. */
22053 if (dwarf_index
== 1)
22056 /* Generate dwarf info. */
22057 mem
= gen_frame_mem (SImode
,
22058 plus_constant (Pmode
,
22061 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22062 RTX_FRAME_RELATED_P (tmp
) = 1;
22063 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22065 mem
= gen_frame_mem (SImode
,
22066 plus_constant (Pmode
,
22069 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
22070 RTX_FRAME_RELATED_P (tmp
) = 1;
22071 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22078 /* Emit a single word store. */
22081 /* Allocate stack space for all saved registers. */
22082 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
22083 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
22084 mem
= gen_frame_mem (SImode
, tmp
);
22087 else if (offset
> 0)
22088 mem
= gen_frame_mem (SImode
,
22089 plus_constant (Pmode
,
22093 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22095 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22096 RTX_FRAME_RELATED_P (tmp
) = 1;
22097 tmp
= emit_insn (tmp
);
22099 /* Record the first store insn. */
22100 if (dwarf_index
== 1)
22103 /* Generate dwarf info. */
22104 mem
= gen_frame_mem (SImode
,
22105 plus_constant(Pmode
,
22108 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
22109 RTX_FRAME_RELATED_P (tmp
) = 1;
22110 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
22119 /* Attach dwarf info to the first insn we generate. */
22120 gcc_assert (insn
!= NULL_RTX
);
22121 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
22122 RTX_FRAME_RELATED_P (insn
) = 1;
22125 /* Generate and emit an insn that we will recognize as a push_multi.
22126 Unfortunately, since this insn does not reflect very well the actual
22127 semantics of the operation, we need to annotate the insn for the benefit
22128 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22129 MASK for registers that should be annotated for DWARF2 frame unwind
22132 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
22135 int num_dwarf_regs
= 0;
22139 int dwarf_par_index
;
22142 /* We don't record the PC in the dwarf frame information. */
22143 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
22145 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22147 if (mask
& (1 << i
))
22149 if (dwarf_regs_mask
& (1 << i
))
22153 gcc_assert (num_regs
&& num_regs
<= 16);
22154 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
22156 /* For the body of the insn we are going to generate an UNSPEC in
22157 parallel with several USEs. This allows the insn to be recognized
22158 by the push_multi pattern in the arm.md file.
22160 The body of the insn looks something like this:
22163 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22164 (const_int:SI <num>)))
22165 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22171 For the frame note however, we try to be more explicit and actually
22172 show each register being stored into the stack frame, plus a (single)
22173 decrement of the stack pointer. We do it this way in order to be
22174 friendly to the stack unwinding code, which only wants to see a single
22175 stack decrement per instruction. The RTL we generate for the note looks
22176 something like this:
22179 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22180 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22181 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22182 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22186 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22187 instead we'd have a parallel expression detailing all
22188 the stores to the various memory addresses so that debug
22189 information is more up-to-date. Remember however while writing
22190 this to take care of the constraints with the push instruction.
22192 Note also that this has to be taken care of for the VFP registers.
22194 For more see PR43399. */
22196 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
22197 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
22198 dwarf_par_index
= 1;
22200 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22202 if (mask
& (1 << i
))
22204 reg
= gen_rtx_REG (SImode
, i
);
22206 XVECEXP (par
, 0, 0)
22207 = gen_rtx_SET (gen_frame_mem
22209 gen_rtx_PRE_MODIFY (Pmode
,
22212 (Pmode
, stack_pointer_rtx
,
22215 gen_rtx_UNSPEC (BLKmode
,
22216 gen_rtvec (1, reg
),
22217 UNSPEC_PUSH_MULT
));
22219 if (dwarf_regs_mask
& (1 << i
))
22221 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
22223 RTX_FRAME_RELATED_P (tmp
) = 1;
22224 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22231 for (j
= 1, i
++; j
< num_regs
; i
++)
22233 if (mask
& (1 << i
))
22235 reg
= gen_rtx_REG (SImode
, i
);
22237 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
22239 if (dwarf_regs_mask
& (1 << i
))
22242 = gen_rtx_SET (gen_frame_mem
22244 plus_constant (Pmode
, stack_pointer_rtx
,
22247 RTX_FRAME_RELATED_P (tmp
) = 1;
22248 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
22255 par
= emit_insn (par
);
22257 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22258 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
22259 RTX_FRAME_RELATED_P (tmp
) = 1;
22260 XVECEXP (dwarf
, 0, 0) = tmp
;
22262 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
22267 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22268 SIZE is the offset to be adjusted.
22269 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22271 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
22275 RTX_FRAME_RELATED_P (insn
) = 1;
22276 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
22277 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
22280 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22281 SAVED_REGS_MASK shows which registers need to be restored.
22283 Unfortunately, since this insn does not reflect very well the actual
22284 semantics of the operation, we need to annotate the insn for the benefit
22285 of DWARF2 frame unwind information. */
22287 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
22292 rtx dwarf
= NULL_RTX
;
22294 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22298 offset_adj
= return_in_pc
? 1 : 0;
22299 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22300 if (saved_regs_mask
& (1 << i
))
22303 gcc_assert (num_regs
&& num_regs
<= 16);
22305 /* If SP is in reglist, then we don't emit SP update insn. */
22306 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
22308 /* The parallel needs to hold num_regs SETs
22309 and one SET for the stack update. */
22310 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
22313 XVECEXP (par
, 0, 0) = ret_rtx
;
22317 /* Increment the stack pointer, based on there being
22318 num_regs 4-byte registers to restore. */
22319 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22320 plus_constant (Pmode
,
22323 RTX_FRAME_RELATED_P (tmp
) = 1;
22324 XVECEXP (par
, 0, offset_adj
) = tmp
;
22327 /* Now restore every reg, which may include PC. */
22328 for (j
= 0, i
= 0; j
< num_regs
; i
++)
22329 if (saved_regs_mask
& (1 << i
))
22331 reg
= gen_rtx_REG (SImode
, i
);
22332 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
22334 /* Emit single load with writeback. */
22335 tmp
= gen_frame_mem (SImode
,
22336 gen_rtx_POST_INC (Pmode
,
22337 stack_pointer_rtx
));
22338 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
22339 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22343 tmp
= gen_rtx_SET (reg
,
22346 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
22347 RTX_FRAME_RELATED_P (tmp
) = 1;
22348 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
22350 /* We need to maintain a sequence for DWARF info too. As dwarf info
22351 should not have PC, skip PC. */
22352 if (i
!= PC_REGNUM
)
22353 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22359 par
= emit_jump_insn (par
);
22361 par
= emit_insn (par
);
22363 REG_NOTES (par
) = dwarf
;
22365 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
22366 stack_pointer_rtx
, stack_pointer_rtx
);
22369 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22370 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22372 Unfortunately, since this insn does not reflect very well the actual
22373 semantics of the operation, we need to annotate the insn for the benefit
22374 of DWARF2 frame unwind information. */
22376 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
22380 rtx dwarf
= NULL_RTX
;
22383 gcc_assert (num_regs
&& num_regs
<= 32);
22385 /* Workaround ARM10 VFPr1 bug. */
22386 if (num_regs
== 2 && !arm_arch6
)
22388 if (first_reg
== 15)
22394 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22395 there could be up to 32 D-registers to restore.
22396 If there are more than 16 D-registers, make two recursive calls,
22397 each of which emits one pop_multi instruction. */
22400 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
22401 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
22405 /* The parallel needs to hold num_regs SETs
22406 and one SET for the stack update. */
22407 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
22409 /* Increment the stack pointer, based on there being
22410 num_regs 8-byte registers to restore. */
22411 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
22412 RTX_FRAME_RELATED_P (tmp
) = 1;
22413 XVECEXP (par
, 0, 0) = tmp
;
22415 /* Now show every reg that will be restored, using a SET for each. */
22416 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
22418 reg
= gen_rtx_REG (DFmode
, i
);
22420 tmp
= gen_rtx_SET (reg
,
22423 plus_constant (Pmode
, base_reg
, 8 * j
)));
22424 RTX_FRAME_RELATED_P (tmp
) = 1;
22425 XVECEXP (par
, 0, j
+ 1) = tmp
;
22427 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22432 par
= emit_insn (par
);
22433 REG_NOTES (par
) = dwarf
;
22435 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22436 if (REGNO (base_reg
) == IP_REGNUM
)
22438 RTX_FRAME_RELATED_P (par
) = 1;
22439 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
22442 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
22443 base_reg
, base_reg
);
22446 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22447 number of registers are being popped, multiple LDRD patterns are created for
22448 all register pairs. If odd number of registers are popped, last register is
22449 loaded by using LDR pattern. */
22451 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
22455 rtx par
= NULL_RTX
;
22456 rtx dwarf
= NULL_RTX
;
22457 rtx tmp
, reg
, tmp1
;
22458 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
22460 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
22461 if (saved_regs_mask
& (1 << i
))
22464 gcc_assert (num_regs
&& num_regs
<= 16);
22466 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22467 to be popped. So, if num_regs is even, now it will become odd,
22468 and we can generate pop with PC. If num_regs is odd, it will be
22469 even now, and ldr with return can be generated for PC. */
22473 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
22475 /* Var j iterates over all the registers to gather all the registers in
22476 saved_regs_mask. Var i gives index of saved registers in stack frame.
22477 A PARALLEL RTX of register-pair is created here, so that pattern for
22478 LDRD can be matched. As PC is always last register to be popped, and
22479 we have already decremented num_regs if PC, we don't have to worry
22480 about PC in this loop. */
22481 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
22482 if (saved_regs_mask
& (1 << j
))
22484 /* Create RTX for memory load. */
22485 reg
= gen_rtx_REG (SImode
, j
);
22486 tmp
= gen_rtx_SET (reg
,
22487 gen_frame_mem (SImode
,
22488 plus_constant (Pmode
,
22489 stack_pointer_rtx
, 4 * i
)));
22490 RTX_FRAME_RELATED_P (tmp
) = 1;
22494 /* When saved-register index (i) is even, the RTX to be emitted is
22495 yet to be created. Hence create it first. The LDRD pattern we
22496 are generating is :
22497 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22498 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22499 where target registers need not be consecutive. */
22500 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22504 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22505 added as 0th element and if i is odd, reg_i is added as 1st element
22506 of LDRD pattern shown above. */
22507 XVECEXP (par
, 0, (i
% 2)) = tmp
;
22508 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22512 /* When saved-register index (i) is odd, RTXs for both the registers
22513 to be loaded are generated in above given LDRD pattern, and the
22514 pattern can be emitted now. */
22515 par
= emit_insn (par
);
22516 REG_NOTES (par
) = dwarf
;
22517 RTX_FRAME_RELATED_P (par
) = 1;
22523 /* If the number of registers pushed is odd AND return_in_pc is false OR
22524 number of registers are even AND return_in_pc is true, last register is
22525 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22526 then LDR with post increment. */
22528 /* Increment the stack pointer, based on there being
22529 num_regs 4-byte registers to restore. */
22530 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22531 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
22532 RTX_FRAME_RELATED_P (tmp
) = 1;
22533 tmp
= emit_insn (tmp
);
22536 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
22537 stack_pointer_rtx
, stack_pointer_rtx
);
22542 if (((num_regs
% 2) == 1 && !return_in_pc
)
22543 || ((num_regs
% 2) == 0 && return_in_pc
))
22545 /* Scan for the single register to be popped. Skip until the saved
22546 register is found. */
22547 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
22549 /* Gen LDR with post increment here. */
22550 tmp1
= gen_rtx_MEM (SImode
,
22551 gen_rtx_POST_INC (SImode
,
22552 stack_pointer_rtx
));
22553 set_mem_alias_set (tmp1
, get_frame_alias_set ());
22555 reg
= gen_rtx_REG (SImode
, j
);
22556 tmp
= gen_rtx_SET (reg
, tmp1
);
22557 RTX_FRAME_RELATED_P (tmp
) = 1;
22558 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
22562 /* If return_in_pc, j must be PC_REGNUM. */
22563 gcc_assert (j
== PC_REGNUM
);
22564 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22565 XVECEXP (par
, 0, 0) = ret_rtx
;
22566 XVECEXP (par
, 0, 1) = tmp
;
22567 par
= emit_jump_insn (par
);
22571 par
= emit_insn (tmp
);
22572 REG_NOTES (par
) = dwarf
;
22573 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22574 stack_pointer_rtx
, stack_pointer_rtx
);
22578 else if ((num_regs
% 2) == 1 && return_in_pc
)
22580 /* There are 2 registers to be popped. So, generate the pattern
22581 pop_multiple_with_stack_update_and_return to pop in PC. */
22582 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
22588 /* LDRD in ARM mode needs consecutive registers as operands. This function
22589 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22590 offset addressing and then generates one separate stack udpate. This provides
22591 more scheduling freedom, compared to writeback on every load. However,
22592 if the function returns using load into PC directly
22593 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22594 before the last load. TODO: Add a peephole optimization to recognize
22595 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22596 peephole optimization to merge the load at stack-offset zero
22597 with the stack update instruction using load with writeback
22598 in post-index addressing mode. */
22600 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
22604 rtx par
= NULL_RTX
;
22605 rtx dwarf
= NULL_RTX
;
22608 /* Restore saved registers. */
22609 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
22611 while (j
<= LAST_ARM_REGNUM
)
22612 if (saved_regs_mask
& (1 << j
))
22615 && (saved_regs_mask
& (1 << (j
+ 1)))
22616 && (j
+ 1) != PC_REGNUM
)
22618 /* Current register and next register form register pair for which
22619 LDRD can be generated. PC is always the last register popped, and
22620 we handle it separately. */
22622 mem
= gen_frame_mem (DImode
,
22623 plus_constant (Pmode
,
22627 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
22629 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
22630 tmp
= emit_insn (tmp
);
22631 RTX_FRAME_RELATED_P (tmp
) = 1;
22633 /* Generate dwarf info. */
22635 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22636 gen_rtx_REG (SImode
, j
),
22638 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22639 gen_rtx_REG (SImode
, j
+ 1),
22642 REG_NOTES (tmp
) = dwarf
;
22647 else if (j
!= PC_REGNUM
)
22649 /* Emit a single word load. */
22651 mem
= gen_frame_mem (SImode
,
22652 plus_constant (Pmode
,
22656 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
22658 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
22659 tmp
= emit_insn (tmp
);
22660 RTX_FRAME_RELATED_P (tmp
) = 1;
22662 /* Generate dwarf info. */
22663 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
22664 gen_rtx_REG (SImode
, j
),
22670 else /* j == PC_REGNUM */
22676 /* Update the stack. */
22679 tmp
= gen_rtx_SET (stack_pointer_rtx
,
22680 plus_constant (Pmode
,
22683 tmp
= emit_insn (tmp
);
22684 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
22685 stack_pointer_rtx
, stack_pointer_rtx
);
22689 if (saved_regs_mask
& (1 << PC_REGNUM
))
22691 /* Only PC is to be popped. */
22692 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
22693 XVECEXP (par
, 0, 0) = ret_rtx
;
22694 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
22695 gen_frame_mem (SImode
,
22696 gen_rtx_POST_INC (SImode
,
22697 stack_pointer_rtx
)));
22698 RTX_FRAME_RELATED_P (tmp
) = 1;
22699 XVECEXP (par
, 0, 1) = tmp
;
22700 par
= emit_jump_insn (par
);
22702 /* Generate dwarf info. */
22703 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
22704 gen_rtx_REG (SImode
, PC_REGNUM
),
22706 REG_NOTES (par
) = dwarf
;
22707 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
22708 stack_pointer_rtx
, stack_pointer_rtx
);
22712 /* Calculate the size of the return value that is passed in registers. */
22714 arm_size_return_regs (void)
22718 if (crtl
->return_rtx
!= 0)
22719 mode
= GET_MODE (crtl
->return_rtx
);
22721 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
22723 return GET_MODE_SIZE (mode
);
22726 /* Return true if the current function needs to save/restore LR. */
22728 thumb_force_lr_save (void)
22730 return !cfun
->machine
->lr_save_eliminated
22732 || thumb_far_jump_used_p ()
22733 || df_regs_ever_live_p (LR_REGNUM
));
22736 /* We do not know if r3 will be available because
22737 we do have an indirect tailcall happening in this
22738 particular case. */
22740 is_indirect_tailcall_p (rtx call
)
22742 rtx pat
= PATTERN (call
);
22744 /* Indirect tail call. */
22745 pat
= XVECEXP (pat
, 0, 0);
22746 if (GET_CODE (pat
) == SET
)
22747 pat
= SET_SRC (pat
);
22749 pat
= XEXP (XEXP (pat
, 0), 0);
22750 return REG_P (pat
);
22753 /* Return true if r3 is used by any of the tail call insns in the
22754 current function. */
22756 any_sibcall_could_use_r3 (void)
22761 if (!crtl
->tail_call_emit
)
22763 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
22764 if (e
->flags
& EDGE_SIBCALL
)
22766 rtx_insn
*call
= BB_END (e
->src
);
22767 if (!CALL_P (call
))
22768 call
= prev_nonnote_nondebug_insn (call
);
22769 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
22770 if (find_regno_fusage (call
, USE
, 3)
22771 || is_indirect_tailcall_p (call
))
22778 /* Compute the distance from register FROM to register TO.
22779 These can be the arg pointer (26), the soft frame pointer (25),
22780 the stack pointer (13) or the hard frame pointer (11).
22781 In thumb mode r7 is used as the soft frame pointer, if needed.
22782 Typical stack layout looks like this:
22784 old stack pointer -> | |
22787 | | saved arguments for
22788 | | vararg functions
22791 hard FP & arg pointer -> | | \
22799 soft frame pointer -> | | /
22804 locals base pointer -> | | /
22809 current stack pointer -> | | /
22812 For a given function some or all of these stack components
22813 may not be needed, giving rise to the possibility of
22814 eliminating some of the registers.
22816 The values returned by this function must reflect the behavior
22817 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22819 The sign of the number returned reflects the direction of stack
22820 growth, so the values are positive for all eliminations except
22821 from the soft frame pointer to the hard frame pointer.
22823 SFP may point just inside the local variables block to ensure correct
22827 /* Return cached stack offsets. */
22829 static arm_stack_offsets
*
22830 arm_get_frame_offsets (void)
22832 struct arm_stack_offsets
*offsets
;
22834 offsets
= &cfun
->machine
->stack_offsets
;
22840 /* Calculate stack offsets. These are used to calculate register elimination
22841 offsets and in prologue/epilogue code. Also calculates which registers
22842 should be saved. */
22845 arm_compute_frame_layout (void)
22847 struct arm_stack_offsets
*offsets
;
22848 unsigned long func_type
;
22851 HOST_WIDE_INT frame_size
;
22854 offsets
= &cfun
->machine
->stack_offsets
;
22856 /* Initially this is the size of the local variables. It will translated
22857 into an offset once we have determined the size of preceding data. */
22858 frame_size
= ROUND_UP_WORD (get_frame_size ());
22860 /* Space for variadic functions. */
22861 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
22863 /* In Thumb mode this is incorrect, but never used. */
22865 = (offsets
->saved_args
22866 + arm_compute_static_chain_stack_bytes ()
22867 + (frame_pointer_needed
? 4 : 0));
22871 unsigned int regno
;
22873 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
22874 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22875 saved
= core_saved
;
22877 /* We know that SP will be doubleword aligned on entry, and we must
22878 preserve that condition at any subroutine call. We also require the
22879 soft frame pointer to be doubleword aligned. */
22881 if (TARGET_REALLY_IWMMXT
)
22883 /* Check for the call-saved iWMMXt registers. */
22884 for (regno
= FIRST_IWMMXT_REGNUM
;
22885 regno
<= LAST_IWMMXT_REGNUM
;
22887 if (reg_needs_saving_p (regno
))
22891 func_type
= arm_current_func_type ();
22892 /* Space for saved VFP registers. */
22893 if (! IS_VOLATILE (func_type
)
22894 && TARGET_VFP_BASE
)
22895 saved
+= arm_get_vfp_saved_size ();
22897 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22898 nonecure entry functions with VSTR/VLDR. */
22899 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
22902 else /* TARGET_THUMB1 */
22904 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
22905 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
22906 saved
= core_saved
;
22907 if (TARGET_BACKTRACE
)
22911 /* Saved registers include the stack frame. */
22912 offsets
->saved_regs
22913 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
22914 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
22916 /* A leaf function does not need any stack alignment if it has nothing
22918 if (crtl
->is_leaf
&& frame_size
== 0
22919 /* However if it calls alloca(), we have a dynamically allocated
22920 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22921 && ! cfun
->calls_alloca
)
22923 offsets
->outgoing_args
= offsets
->soft_frame
;
22924 offsets
->locals_base
= offsets
->soft_frame
;
22928 /* Ensure SFP has the correct alignment. */
22929 if (ARM_DOUBLEWORD_ALIGN
22930 && (offsets
->soft_frame
& 7))
22932 offsets
->soft_frame
+= 4;
22933 /* Try to align stack by pushing an extra reg. Don't bother doing this
22934 when there is a stack frame as the alignment will be rolled into
22935 the normal stack adjustment. */
22936 if (frame_size
+ crtl
->outgoing_args_size
== 0)
22940 /* Register r3 is caller-saved. Normally it does not need to be
22941 saved on entry by the prologue. However if we choose to save
22942 it for padding then we may confuse the compiler into thinking
22943 a prologue sequence is required when in fact it is not. This
22944 will occur when shrink-wrapping if r3 is used as a scratch
22945 register and there are no other callee-saved writes.
22947 This situation can be avoided when other callee-saved registers
22948 are available and r3 is not mandatory if we choose a callee-saved
22949 register for padding. */
22950 bool prefer_callee_reg_p
= false;
22952 /* If it is safe to use r3, then do so. This sometimes
22953 generates better code on Thumb-2 by avoiding the need to
22954 use 32-bit push/pop instructions. */
22955 if (! any_sibcall_could_use_r3 ()
22956 && arm_size_return_regs () <= 12
22957 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
22959 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
22962 if (!TARGET_THUMB2
)
22963 prefer_callee_reg_p
= true;
22966 || prefer_callee_reg_p
)
22968 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
22970 /* Avoid fixed registers; they may be changed at
22971 arbitrary times so it's unsafe to restore them
22972 during the epilogue. */
22974 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
22984 offsets
->saved_regs
+= 4;
22985 offsets
->saved_regs_mask
|= (1 << reg
);
22990 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
22991 offsets
->outgoing_args
= (offsets
->locals_base
22992 + crtl
->outgoing_args_size
);
22994 if (ARM_DOUBLEWORD_ALIGN
)
22996 /* Ensure SP remains doubleword aligned. */
22997 if (offsets
->outgoing_args
& 7)
22998 offsets
->outgoing_args
+= 4;
22999 gcc_assert (!(offsets
->outgoing_args
& 7));
23004 /* Calculate the relative offsets for the different stack pointers. Positive
23005 offsets are in the direction of stack growth. */
23008 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
23010 arm_stack_offsets
*offsets
;
23012 offsets
= arm_get_frame_offsets ();
23014 /* OK, now we have enough information to compute the distances.
23015 There must be an entry in these switch tables for each pair
23016 of registers in ELIMINABLE_REGS, even if some of the entries
23017 seem to be redundant or useless. */
23020 case ARG_POINTER_REGNUM
:
23023 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23026 case FRAME_POINTER_REGNUM
:
23027 /* This is the reverse of the soft frame pointer
23028 to hard frame pointer elimination below. */
23029 return offsets
->soft_frame
- offsets
->saved_args
;
23031 case ARM_HARD_FRAME_POINTER_REGNUM
:
23032 /* This is only non-zero in the case where the static chain register
23033 is stored above the frame. */
23034 return offsets
->frame
- offsets
->saved_args
- 4;
23036 case STACK_POINTER_REGNUM
:
23037 /* If nothing has been pushed on the stack at all
23038 then this will return -4. This *is* correct! */
23039 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
23042 gcc_unreachable ();
23044 gcc_unreachable ();
23046 case FRAME_POINTER_REGNUM
:
23049 case THUMB_HARD_FRAME_POINTER_REGNUM
:
23052 case ARM_HARD_FRAME_POINTER_REGNUM
:
23053 /* The hard frame pointer points to the top entry in the
23054 stack frame. The soft frame pointer to the bottom entry
23055 in the stack frame. If there is no stack frame at all,
23056 then they are identical. */
23058 return offsets
->frame
- offsets
->soft_frame
;
23060 case STACK_POINTER_REGNUM
:
23061 return offsets
->outgoing_args
- offsets
->soft_frame
;
23064 gcc_unreachable ();
23066 gcc_unreachable ();
23069 /* You cannot eliminate from the stack pointer.
23070 In theory you could eliminate from the hard frame
23071 pointer to the stack pointer, but this will never
23072 happen, since if a stack frame is not needed the
23073 hard frame pointer will never be used. */
23074 gcc_unreachable ();
23078 /* Given FROM and TO register numbers, say whether this elimination is
23079 allowed. Frame pointer elimination is automatically handled.
23081 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23082 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23083 pointer, we must eliminate FRAME_POINTER_REGNUM into
23084 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23085 ARG_POINTER_REGNUM. */
23088 arm_can_eliminate (const int from
, const int to
)
23090 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
23091 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
23092 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
23093 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
23097 /* Emit RTL to save coprocessor registers on function entry. Returns the
23098 number of bytes pushed. */
23101 arm_save_coproc_regs(void)
23103 int saved_size
= 0;
23105 unsigned start_reg
;
23108 if (TARGET_REALLY_IWMMXT
)
23109 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
23110 if (reg_needs_saving_p (reg
))
23112 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23113 insn
= gen_rtx_MEM (V2SImode
, insn
);
23114 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
23115 RTX_FRAME_RELATED_P (insn
) = 1;
23119 if (TARGET_VFP_BASE
)
23121 start_reg
= FIRST_VFP_REGNUM
;
23123 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
23125 if (!reg_needs_saving_p (reg
) && !reg_needs_saving_p (reg
+ 1))
23127 if (start_reg
!= reg
)
23128 saved_size
+= vfp_emit_fstmd (start_reg
,
23129 (reg
- start_reg
) / 2);
23130 start_reg
= reg
+ 2;
23133 if (start_reg
!= reg
)
23134 saved_size
+= vfp_emit_fstmd (start_reg
,
23135 (reg
- start_reg
) / 2);
23141 /* Set the Thumb frame pointer from the stack pointer. */
23144 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
23146 HOST_WIDE_INT amount
;
23149 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
23151 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23152 stack_pointer_rtx
, GEN_INT (amount
)));
23155 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
23156 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23157 expects the first two operands to be the same. */
23160 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23162 hard_frame_pointer_rtx
));
23166 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23167 hard_frame_pointer_rtx
,
23168 stack_pointer_rtx
));
23170 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
23171 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
23172 RTX_FRAME_RELATED_P (dwarf
) = 1;
23173 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23176 RTX_FRAME_RELATED_P (insn
) = 1;
23179 struct scratch_reg
{
23184 /* Return a short-lived scratch register for use as a 2nd scratch register on
23185 function entry after the registers are saved in the prologue. This register
23186 must be released by means of release_scratch_register_on_entry. IP is not
23187 considered since it is always used as the 1st scratch register if available.
23189 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23190 mask of live registers. */
23193 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
23194 unsigned long live_regs
)
23200 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
23206 for (i
= 4; i
< 11; i
++)
23207 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
23215 /* If IP is used as the 1st scratch register for a nested function,
23216 then either r3 wasn't available or is used to preserve IP. */
23217 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
23219 regno
= (regno1
== 3 ? 2 : 3);
23221 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
23226 sr
->reg
= gen_rtx_REG (SImode
, regno
);
23229 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23230 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
23231 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23232 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23233 RTX_FRAME_RELATED_P (insn
) = 1;
23234 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23238 /* Release a scratch register obtained from the preceding function. */
23241 release_scratch_register_on_entry (struct scratch_reg
*sr
)
23245 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
23246 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
23247 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
23248 plus_constant (Pmode
, stack_pointer_rtx
, 4));
23249 RTX_FRAME_RELATED_P (insn
) = 1;
23250 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
23254 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23256 #if PROBE_INTERVAL > 4096
23257 #error Cannot use indexed addressing mode for stack probing
23260 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23261 inclusive. These are offsets from the current stack pointer. REGNO1
23262 is the index number of the 1st scratch register and LIVE_REGS is the
23263 mask of live registers. */
23266 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
23267 unsigned int regno1
, unsigned long live_regs
)
23269 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
23271 /* See if we have a constant small number of probes to generate. If so,
23272 that's the easy case. */
23273 if (size
<= PROBE_INTERVAL
)
23275 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23276 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23277 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
23280 /* The run-time loop is made up of 10 insns in the generic case while the
23281 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23282 else if (size
<= 5 * PROBE_INTERVAL
)
23284 HOST_WIDE_INT i
, rem
;
23286 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
23287 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23288 emit_stack_probe (reg1
);
23290 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23291 it exceeds SIZE. If only two probes are needed, this will not
23292 generate any code. Then probe at FIRST + SIZE. */
23293 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
23295 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23296 emit_stack_probe (reg1
);
23299 rem
= size
- (i
- PROBE_INTERVAL
);
23300 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23302 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
23303 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
23306 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
23309 /* Otherwise, do the same as above, but in a loop. Note that we must be
23310 extra careful with variables wrapping around because we might be at
23311 the very top (or the very bottom) of the address space and we have
23312 to be able to handle this case properly; in particular, we use an
23313 equality test for the loop condition. */
23316 HOST_WIDE_INT rounded_size
;
23317 struct scratch_reg sr
;
23319 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
23321 emit_move_insn (reg1
, GEN_INT (first
));
23324 /* Step 1: round SIZE to the previous multiple of the interval. */
23326 rounded_size
= size
& -PROBE_INTERVAL
;
23327 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
23330 /* Step 2: compute initial and final value of the loop counter. */
23332 /* TEST_ADDR = SP + FIRST. */
23333 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
23335 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23336 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
23339 /* Step 3: the loop
23343 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23346 while (TEST_ADDR != LAST_ADDR)
23348 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23349 until it is equal to ROUNDED_SIZE. */
23351 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
23354 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23355 that SIZE is equal to ROUNDED_SIZE. */
23357 if (size
!= rounded_size
)
23359 HOST_WIDE_INT rem
= size
- rounded_size
;
23361 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
23363 emit_set_insn (sr
.reg
,
23364 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
23365 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
23366 PROBE_INTERVAL
- rem
));
23369 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
23372 release_scratch_register_on_entry (&sr
);
23375 /* Make sure nothing is scheduled before we are done. */
23376 emit_insn (gen_blockage ());
23379 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23380 absolute addresses. */
23383 output_probe_stack_range (rtx reg1
, rtx reg2
)
23385 static int labelno
= 0;
23389 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
23392 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
23394 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23396 xops
[1] = GEN_INT (PROBE_INTERVAL
);
23397 output_asm_insn ("sub\t%0, %0, %1", xops
);
23399 /* Probe at TEST_ADDR. */
23400 output_asm_insn ("str\tr0, [%0, #0]", xops
);
23402 /* Test if TEST_ADDR == LAST_ADDR. */
23404 output_asm_insn ("cmp\t%0, %1", xops
);
23407 fputs ("\tbne\t", asm_out_file
);
23408 assemble_name_raw (asm_out_file
, loop_lab
);
23409 fputc ('\n', asm_out_file
);
23414 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23417 arm_expand_prologue (void)
23422 unsigned long live_regs_mask
;
23423 unsigned long func_type
;
23425 int saved_pretend_args
= 0;
23426 int saved_regs
= 0;
23427 unsigned HOST_WIDE_INT args_to_push
;
23428 HOST_WIDE_INT size
;
23429 arm_stack_offsets
*offsets
;
23432 func_type
= arm_current_func_type ();
23434 /* Naked functions don't have prologues. */
23435 if (IS_NAKED (func_type
))
23437 if (flag_stack_usage_info
)
23438 current_function_static_stack_size
= 0;
23442 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23443 args_to_push
= crtl
->args
.pretend_args_size
;
23445 /* Compute which register we will have to save onto the stack. */
23446 offsets
= arm_get_frame_offsets ();
23447 live_regs_mask
= offsets
->saved_regs_mask
;
23449 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
23451 if (IS_STACKALIGN (func_type
))
23455 /* Handle a word-aligned stack pointer. We generate the following:
23460 <save and restore r0 in normal prologue/epilogue>
23464 The unwinder doesn't need to know about the stack realignment.
23465 Just tell it we saved SP in r0. */
23466 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
23468 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
23469 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
23471 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
23472 RTX_FRAME_RELATED_P (insn
) = 1;
23473 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
23475 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
23477 /* ??? The CFA changes here, which may cause GDB to conclude that it
23478 has entered a different function. That said, the unwind info is
23479 correct, individually, before and after this instruction because
23480 we've described the save of SP, which will override the default
23481 handling of SP as restoring from the CFA. */
23482 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
23485 /* Let's compute the static_chain_stack_bytes required and store it. Right
23486 now the value must be -1 as stored by arm_init_machine_status (). */
23487 cfun
->machine
->static_chain_stack_bytes
23488 = arm_compute_static_chain_stack_bytes ();
23490 /* The static chain register is the same as the IP register. If it is
23491 clobbered when creating the frame, we need to save and restore it. */
23492 clobber_ip
= IS_NESTED (func_type
)
23493 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23494 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23495 || flag_stack_clash_protection
)
23496 && !df_regs_ever_live_p (LR_REGNUM
)
23497 && arm_r3_live_at_start_p ()));
23499 /* Find somewhere to store IP whilst the frame is being created.
23500 We try the following places in order:
23502 1. The last argument register r3 if it is available.
23503 2. A slot on the stack above the frame if there are no
23504 arguments to push onto the stack.
23505 3. Register r3 again, after pushing the argument registers
23506 onto the stack, if this is a varargs function.
23507 4. The last slot on the stack created for the arguments to
23508 push, if this isn't a varargs function.
23510 Note - we only need to tell the dwarf2 backend about the SP
23511 adjustment in the second variant; the static chain register
23512 doesn't need to be unwound, as it doesn't contain a value
23513 inherited from the caller. */
23516 if (!arm_r3_live_at_start_p ())
23517 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23518 else if (args_to_push
== 0)
23522 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23525 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23526 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23529 /* Just tell the dwarf backend that we adjusted SP. */
23530 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23531 plus_constant (Pmode
, stack_pointer_rtx
,
23533 RTX_FRAME_RELATED_P (insn
) = 1;
23534 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23538 /* Store the args on the stack. */
23539 if (cfun
->machine
->uses_anonymous_args
)
23541 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23542 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23543 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
23544 saved_pretend_args
= 1;
23550 if (args_to_push
== 4)
23551 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
23553 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
23554 plus_constant (Pmode
,
23558 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
23560 /* Just tell the dwarf backend that we adjusted SP. */
23561 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23562 plus_constant (Pmode
, stack_pointer_rtx
,
23564 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23567 RTX_FRAME_RELATED_P (insn
) = 1;
23568 fp_offset
= args_to_push
;
23573 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
23575 if (IS_INTERRUPT (func_type
))
23577 /* Interrupt functions must not corrupt any registers.
23578 Creating a frame pointer however, corrupts the IP
23579 register, so we must push it first. */
23580 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
23582 /* Do not set RTX_FRAME_RELATED_P on this insn.
23583 The dwarf stack unwinding code only wants to see one
23584 stack decrement per function, and this is not it. If
23585 this instruction is labeled as being part of the frame
23586 creation sequence then dwarf2out_frame_debug_expr will
23587 die when it encounters the assignment of IP to FP
23588 later on, since the use of SP here establishes SP as
23589 the CFA register and not IP.
23591 Anyway this instruction is not really part of the stack
23592 frame creation although it is part of the prologue. */
23595 insn
= emit_set_insn (ip_rtx
,
23596 plus_constant (Pmode
, stack_pointer_rtx
,
23598 RTX_FRAME_RELATED_P (insn
) = 1;
23601 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23602 if (TARGET_HAVE_FPCXT_CMSE
&& IS_CMSE_ENTRY (func_type
))
23605 insn
= emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx
,
23606 GEN_INT (FPCXTNS_ENUM
)));
23607 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
23608 plus_constant (Pmode
, stack_pointer_rtx
, -4));
23609 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
23610 RTX_FRAME_RELATED_P (insn
) = 1;
23615 /* Push the argument registers, or reserve space for them. */
23616 if (cfun
->machine
->uses_anonymous_args
)
23617 insn
= emit_multi_reg_push
23618 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
23619 (0xf0 >> (args_to_push
/ 4)) & 0xf);
23622 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23623 GEN_INT (- args_to_push
)));
23624 RTX_FRAME_RELATED_P (insn
) = 1;
23627 /* If this is an interrupt service routine, and the link register
23628 is going to be pushed, and we're not generating extra
23629 push of IP (needed when frame is needed and frame layout if apcs),
23630 subtracting four from LR now will mean that the function return
23631 can be done with a single instruction. */
23632 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
23633 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
23634 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
23637 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
23639 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
23642 if (live_regs_mask
)
23644 unsigned long dwarf_regs_mask
= live_regs_mask
;
23646 saved_regs
+= bit_count (live_regs_mask
) * 4;
23647 if (optimize_size
&& !frame_pointer_needed
23648 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
23650 /* If no coprocessor registers are being pushed and we don't have
23651 to worry about a frame pointer then push extra registers to
23652 create the stack frame. This is done in a way that does not
23653 alter the frame layout, so is independent of the epilogue. */
23657 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
23659 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
23660 if (frame
&& n
* 4 >= frame
)
23663 live_regs_mask
|= (1 << n
) - 1;
23664 saved_regs
+= frame
;
23669 && current_tune
->prefer_ldrd_strd
23670 && !optimize_function_for_size_p (cfun
))
23672 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
23674 thumb2_emit_strd_push (live_regs_mask
);
23675 else if (TARGET_ARM
23676 && !TARGET_APCS_FRAME
23677 && !IS_INTERRUPT (func_type
))
23678 arm_emit_strd_push (live_regs_mask
);
23681 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
23682 RTX_FRAME_RELATED_P (insn
) = 1;
23687 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
23688 RTX_FRAME_RELATED_P (insn
) = 1;
23692 if (! IS_VOLATILE (func_type
))
23693 saved_regs
+= arm_save_coproc_regs ();
23695 if (frame_pointer_needed
&& TARGET_ARM
)
23697 /* Create the new frame pointer. */
23698 if (TARGET_APCS_FRAME
)
23700 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
23701 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
23702 RTX_FRAME_RELATED_P (insn
) = 1;
23706 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
23707 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
23708 stack_pointer_rtx
, insn
));
23709 RTX_FRAME_RELATED_P (insn
) = 1;
23713 size
= offsets
->outgoing_args
- offsets
->saved_args
;
23714 if (flag_stack_usage_info
)
23715 current_function_static_stack_size
= size
;
23717 /* If this isn't an interrupt service routine and we have a frame, then do
23718 stack checking. We use IP as the first scratch register, except for the
23719 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23720 if (!IS_INTERRUPT (func_type
)
23721 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
23722 || flag_stack_clash_protection
))
23724 unsigned int regno
;
23726 if (!IS_NESTED (func_type
) || clobber_ip
)
23728 else if (df_regs_ever_live_p (LR_REGNUM
))
23733 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
23735 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
23736 arm_emit_probe_stack_range (get_stack_check_protect (),
23737 size
- get_stack_check_protect (),
23738 regno
, live_regs_mask
);
23741 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
23742 regno
, live_regs_mask
);
23745 /* Recover the static chain register. */
23748 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
23749 insn
= gen_rtx_REG (SImode
, 3);
23752 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
23753 insn
= gen_frame_mem (SImode
, insn
);
23755 emit_set_insn (ip_rtx
, insn
);
23756 emit_insn (gen_force_register_use (ip_rtx
));
23759 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
23761 /* This add can produce multiple insns for a large constant, so we
23762 need to get tricky. */
23763 rtx_insn
*last
= get_last_insn ();
23765 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
23766 - offsets
->outgoing_args
);
23768 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
23772 last
= last
? NEXT_INSN (last
) : get_insns ();
23773 RTX_FRAME_RELATED_P (last
) = 1;
23775 while (last
!= insn
);
23777 /* If the frame pointer is needed, emit a special barrier that
23778 will prevent the scheduler from moving stores to the frame
23779 before the stack adjustment. */
23780 if (frame_pointer_needed
)
23781 emit_insn (gen_stack_tie (stack_pointer_rtx
,
23782 hard_frame_pointer_rtx
));
23786 if (frame_pointer_needed
&& TARGET_THUMB2
)
23787 thumb_set_frame_pointer (offsets
);
23789 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
23791 unsigned long mask
;
23793 mask
= live_regs_mask
;
23794 mask
&= THUMB2_WORK_REGS
;
23795 if (!IS_NESTED (func_type
))
23796 mask
|= (1 << IP_REGNUM
);
23797 arm_load_pic_register (mask
, NULL_RTX
);
23800 /* If we are profiling, make sure no instructions are scheduled before
23801 the call to mcount. Similarly if the user has requested no
23802 scheduling in the prolog. Similarly if we want non-call exceptions
23803 using the EABI unwinder, to prevent faulting instructions from being
23804 swapped with a stack adjustment. */
23805 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
23806 || (arm_except_unwind_info (&global_options
) == UI_TARGET
23807 && cfun
->can_throw_non_call_exceptions
))
23808 emit_insn (gen_blockage ());
23810 /* If the link register is being kept alive, with the return address in it,
23811 then make sure that it does not get reused by the ce2 pass. */
23812 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
23813 cfun
->machine
->lr_save_eliminated
= 1;
23816 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23818 arm_print_condition (FILE *stream
)
23820 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
23822 /* Branch conversion is not implemented for Thumb-2. */
23825 output_operand_lossage ("predicated Thumb instruction");
23828 if (current_insn_predicate
!= NULL
)
23830 output_operand_lossage
23831 ("predicated instruction in conditional sequence");
23835 fputs (arm_condition_codes
[arm_current_cc
], stream
);
23837 else if (current_insn_predicate
)
23839 enum arm_cond_code code
;
23843 output_operand_lossage ("predicated Thumb instruction");
23847 code
= get_arm_condition_code (current_insn_predicate
);
23848 fputs (arm_condition_codes
[code
], stream
);
23853 /* Globally reserved letters: acln
23854 Puncutation letters currently used: @_|?().!#
23855 Lower case letters currently used: bcdefhimpqtvwxyz
23856 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
23857 Letters previously used, but now deprecated/obsolete: sWXYZ.
23859 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23861 If CODE is 'd', then the X is a condition operand and the instruction
23862 should only be executed if the condition is true.
23863 if CODE is 'D', then the X is a condition operand and the instruction
23864 should only be executed if the condition is false: however, if the mode
23865 of the comparison is CCFPEmode, then always execute the instruction -- we
23866 do this because in these circumstances !GE does not necessarily imply LT;
23867 in these cases the instruction pattern will take care to make sure that
23868 an instruction containing %d will follow, thereby undoing the effects of
23869 doing this instruction unconditionally.
23870 If CODE is 'N' then X is a floating point operand that must be negated
23872 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23873 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
23874 If CODE is 'V', then the operand must be a CONST_INT representing
23875 the bits to preserve in the modified register (Rd) of a BFI or BFC
23876 instruction: print out both the width and lsb (shift) fields. */
23878 arm_print_operand (FILE *stream
, rtx x
, int code
)
23883 fputs (ASM_COMMENT_START
, stream
);
23887 fputs (user_label_prefix
, stream
);
23891 fputs (REGISTER_PREFIX
, stream
);
23895 arm_print_condition (stream
);
23899 /* The current condition code for a condition code setting instruction.
23900 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23901 fputc('s', stream
);
23902 arm_print_condition (stream
);
23906 /* If the instruction is conditionally executed then print
23907 the current condition code, otherwise print 's'. */
23908 gcc_assert (TARGET_THUMB2
);
23909 if (current_insn_predicate
)
23910 arm_print_condition (stream
);
23912 fputc('s', stream
);
23915 /* %# is a "break" sequence. It doesn't output anything, but is used to
23916 separate e.g. operand numbers from following text, if that text consists
23917 of further digits which we don't want to be part of the operand
23925 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
23926 fprintf (stream
, "%s", fp_const_from_val (&r
));
23930 /* An integer or symbol address without a preceding # sign. */
23932 switch (GET_CODE (x
))
23935 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
23939 output_addr_const (stream
, x
);
23943 if (GET_CODE (XEXP (x
, 0)) == PLUS
23944 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
23946 output_addr_const (stream
, x
);
23949 /* Fall through. */
23952 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23956 /* An integer that we want to print in HEX. */
23958 switch (GET_CODE (x
))
23961 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
23965 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23970 if (CONST_INT_P (x
))
23973 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
23974 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
23978 putc ('~', stream
);
23979 output_addr_const (stream
, x
);
23984 /* Print the log2 of a CONST_INT. */
23988 if (!CONST_INT_P (x
)
23989 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
23990 output_operand_lossage ("Unsupported operand for code '%c'", code
);
23992 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
23997 /* The low 16 bits of an immediate constant. */
23998 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
24002 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
24006 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
24014 shift
= shift_op (x
, &val
);
24018 fprintf (stream
, ", %s ", shift
);
24020 arm_print_operand (stream
, XEXP (x
, 1), 0);
24022 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
24027 /* An explanation of the 'Q', 'R' and 'H' register operands:
24029 In a pair of registers containing a DI or DF value the 'Q'
24030 operand returns the register number of the register containing
24031 the least significant part of the value. The 'R' operand returns
24032 the register number of the register containing the most
24033 significant part of the value.
24035 The 'H' operand returns the higher of the two register numbers.
24036 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24037 same as the 'Q' operand, since the most significant part of the
24038 value is held in the lower number register. The reverse is true
24039 on systems where WORDS_BIG_ENDIAN is false.
24041 The purpose of these operands is to distinguish between cases
24042 where the endian-ness of the values is important (for example
24043 when they are added together), and cases where the endian-ness
24044 is irrelevant, but the order of register operations is important.
24045 For example when loading a value from memory into a register
24046 pair, the endian-ness does not matter. Provided that the value
24047 from the lower memory address is put into the lower numbered
24048 register, and the value from the higher address is put into the
24049 higher numbered register, the load will work regardless of whether
24050 the value being loaded is big-wordian or little-wordian. The
24051 order of the two register loads can matter however, if the address
24052 of the memory location is actually held in one of the registers
24053 being overwritten by the load.
24055 The 'Q' and 'R' constraints are also available for 64-bit
24058 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24060 rtx part
= gen_lowpart (SImode
, x
);
24061 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24065 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24067 output_operand_lossage ("invalid operand for code '%c'", code
);
24071 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
24075 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
24077 machine_mode mode
= GET_MODE (x
);
24080 if (mode
== VOIDmode
)
24082 part
= gen_highpart_mode (SImode
, mode
, x
);
24083 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
24087 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24089 output_operand_lossage ("invalid operand for code '%c'", code
);
24093 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
24097 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24099 output_operand_lossage ("invalid operand for code '%c'", code
);
24103 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
24107 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24109 output_operand_lossage ("invalid operand for code '%c'", code
);
24113 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
24117 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
24119 output_operand_lossage ("invalid operand for code '%c'", code
);
24123 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
24127 asm_fprintf (stream
, "%r",
24128 REG_P (XEXP (x
, 0))
24129 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
24133 asm_fprintf (stream
, "{%r-%r}",
24135 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
24138 /* Like 'M', but writing doubleword vector registers, for use by Neon
24142 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
24143 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
24145 asm_fprintf (stream
, "{d%d}", regno
);
24147 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
24152 /* CONST_TRUE_RTX means always -- that's the default. */
24153 if (x
== const_true_rtx
)
24156 if (!COMPARISON_P (x
))
24158 output_operand_lossage ("invalid operand for code '%c'", code
);
24162 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
24167 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24168 want to do that. */
24169 if (x
== const_true_rtx
)
24171 output_operand_lossage ("instruction never executed");
24174 if (!COMPARISON_P (x
))
24176 output_operand_lossage ("invalid operand for code '%c'", code
);
24180 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
24181 (get_arm_condition_code (x
))],
24187 /* Output the LSB (shift) and width for a bitmask instruction
24188 based on a literal mask. The LSB is printed first,
24189 followed by the width.
24191 Eg. For 0b1...1110001, the result is #1, #3. */
24192 if (!CONST_INT_P (x
))
24194 output_operand_lossage ("invalid operand for code '%c'", code
);
24198 unsigned HOST_WIDE_INT val
24199 = ~UINTVAL (x
) & HOST_WIDE_INT_UC (0xffffffff);
24200 int lsb
= exact_log2 (val
& -val
);
24201 asm_fprintf (stream
, "#%d, #%d", lsb
,
24202 (exact_log2 (val
+ (val
& -val
)) - lsb
));
24211 /* Former Maverick support, removed after GCC-4.7. */
24212 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
24217 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
24218 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
24219 /* Bad value for wCG register number. */
24221 output_operand_lossage ("invalid operand for code '%c'", code
);
24226 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
24229 /* Print an iWMMXt control register name. */
24231 if (!CONST_INT_P (x
)
24233 || INTVAL (x
) >= 16)
24234 /* Bad value for wC register number. */
24236 output_operand_lossage ("invalid operand for code '%c'", code
);
24242 static const char * wc_reg_names
[16] =
24244 "wCID", "wCon", "wCSSF", "wCASF",
24245 "wC4", "wC5", "wC6", "wC7",
24246 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24247 "wC12", "wC13", "wC14", "wC15"
24250 fputs (wc_reg_names
[INTVAL (x
)], stream
);
24254 /* Print the high single-precision register of a VFP double-precision
24258 machine_mode mode
= GET_MODE (x
);
24261 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
24263 output_operand_lossage ("invalid operand for code '%c'", code
);
24268 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
24270 output_operand_lossage ("invalid operand for code '%c'", code
);
24274 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
24278 /* Print a VFP/Neon double precision or quad precision register name. */
24282 machine_mode mode
= GET_MODE (x
);
24283 int is_quad
= (code
== 'q');
24286 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
24288 output_operand_lossage ("invalid operand for code '%c'", code
);
24293 || !IS_VFP_REGNUM (REGNO (x
)))
24295 output_operand_lossage ("invalid operand for code '%c'", code
);
24300 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
24301 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
24303 output_operand_lossage ("invalid operand for code '%c'", code
);
24307 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
24308 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
24312 /* These two codes print the low/high doubleword register of a Neon quad
24313 register, respectively. For pair-structure types, can also print
24314 low/high quadword registers. */
24318 machine_mode mode
= GET_MODE (x
);
24321 if ((GET_MODE_SIZE (mode
) != 16
24322 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
24324 output_operand_lossage ("invalid operand for code '%c'", code
);
24329 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
24331 output_operand_lossage ("invalid operand for code '%c'", code
);
24335 if (GET_MODE_SIZE (mode
) == 16)
24336 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
24337 + (code
== 'f' ? 1 : 0));
24339 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
24340 + (code
== 'f' ? 1 : 0));
24344 /* Print a VFPv3 floating-point constant, represented as an integer
24348 int index
= vfp3_const_double_index (x
);
24349 gcc_assert (index
!= -1);
24350 fprintf (stream
, "%d", index
);
24354 /* Print bits representing opcode features for Neon.
24356 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24357 and polynomials as unsigned.
24359 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24361 Bit 2 is 1 for rounding functions, 0 otherwise. */
24363 /* Identify the type as 's', 'u', 'p' or 'f'. */
24366 HOST_WIDE_INT bits
= INTVAL (x
);
24367 fputc ("uspf"[bits
& 3], stream
);
24371 /* Likewise, but signed and unsigned integers are both 'i'. */
24374 HOST_WIDE_INT bits
= INTVAL (x
);
24375 fputc ("iipf"[bits
& 3], stream
);
24379 /* As for 'T', but emit 'u' instead of 'p'. */
24382 HOST_WIDE_INT bits
= INTVAL (x
);
24383 fputc ("usuf"[bits
& 3], stream
);
24387 /* Bit 2: rounding (vs none). */
24390 HOST_WIDE_INT bits
= INTVAL (x
);
24391 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
24395 /* Memory operand for vld1/vst1 instruction. */
24399 bool postinc
= FALSE
;
24400 rtx postinc_reg
= NULL
;
24401 unsigned align
, memsize
, align_bits
;
24403 gcc_assert (MEM_P (x
));
24404 addr
= XEXP (x
, 0);
24405 if (GET_CODE (addr
) == POST_INC
)
24408 addr
= XEXP (addr
, 0);
24410 if (GET_CODE (addr
) == POST_MODIFY
)
24412 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
24413 addr
= XEXP (addr
, 0);
24415 asm_fprintf (stream
, "[%r", REGNO (addr
));
24417 /* We know the alignment of this access, so we can emit a hint in the
24418 instruction (for some alignments) as an aid to the memory subsystem
24420 align
= MEM_ALIGN (x
) >> 3;
24421 memsize
= MEM_SIZE (x
);
24423 /* Only certain alignment specifiers are supported by the hardware. */
24424 if (memsize
== 32 && (align
% 32) == 0)
24426 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
24428 else if (memsize
>= 8 && (align
% 8) == 0)
24433 if (align_bits
!= 0)
24434 asm_fprintf (stream
, ":%d", align_bits
);
24436 asm_fprintf (stream
, "]");
24439 fputs("!", stream
);
24441 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
24445 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24446 rtx_code the memory operands output looks like following.
24448 2. [Rn, #+/-<imm>]!
24454 rtx postinc_reg
= NULL
;
24455 unsigned inc_val
= 0;
24456 enum rtx_code code
;
24458 gcc_assert (MEM_P (x
));
24459 addr
= XEXP (x
, 0);
24460 code
= GET_CODE (addr
);
24461 if (code
== POST_INC
|| code
== POST_DEC
|| code
== PRE_INC
24462 || code
== PRE_DEC
)
24464 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24465 inc_val
= GET_MODE_SIZE (GET_MODE (x
));
24466 if (code
== POST_INC
|| code
== POST_DEC
)
24467 asm_fprintf (stream
, "], #%s%d",(code
== POST_INC
)
24468 ? "": "-", inc_val
);
24470 asm_fprintf (stream
, ", #%s%d]!",(code
== PRE_INC
)
24471 ? "": "-", inc_val
);
24473 else if (code
== POST_MODIFY
|| code
== PRE_MODIFY
)
24475 asm_fprintf (stream
, "[%r", REGNO (XEXP (addr
, 0)));
24476 postinc_reg
= XEXP (XEXP (addr
, 1), 1);
24477 if (postinc_reg
&& CONST_INT_P (postinc_reg
))
24479 if (code
== POST_MODIFY
)
24480 asm_fprintf (stream
, "], #%wd",INTVAL (postinc_reg
));
24482 asm_fprintf (stream
, ", #%wd]!",INTVAL (postinc_reg
));
24485 else if (code
== PLUS
)
24487 rtx base
= XEXP (addr
, 0);
24488 rtx index
= XEXP (addr
, 1);
24490 gcc_assert (REG_P (base
) && CONST_INT_P (index
));
24492 HOST_WIDE_INT offset
= INTVAL (index
);
24493 asm_fprintf (stream
, "[%r, #%wd]", REGNO (base
), offset
);
24497 gcc_assert (REG_P (addr
));
24498 asm_fprintf (stream
, "[%r]",REGNO (addr
));
24507 gcc_assert (MEM_P (x
));
24508 addr
= XEXP (x
, 0);
24509 gcc_assert (REG_P (addr
));
24510 asm_fprintf (stream
, "[%r]", REGNO (addr
));
24514 /* Translate an S register number into a D register number and element index. */
24517 machine_mode mode
= GET_MODE (x
);
24520 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
24522 output_operand_lossage ("invalid operand for code '%c'", code
);
24527 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24529 output_operand_lossage ("invalid operand for code '%c'", code
);
24533 regno
= regno
- FIRST_VFP_REGNUM
;
24534 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
24539 gcc_assert (CONST_DOUBLE_P (x
));
24541 result
= vfp3_const_double_for_fract_bits (x
);
24543 result
= vfp3_const_double_for_bits (x
);
24544 fprintf (stream
, "#%d", result
);
24547 /* Register specifier for vld1.16/vst1.16. Translate the S register
24548 number into a D register number and element index. */
24551 machine_mode mode
= GET_MODE (x
);
24554 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
24556 output_operand_lossage ("invalid operand for code '%c'", code
);
24561 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
24563 output_operand_lossage ("invalid operand for code '%c'", code
);
24567 regno
= regno
- FIRST_VFP_REGNUM
;
24568 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
24575 output_operand_lossage ("missing operand");
24579 switch (GET_CODE (x
))
24582 asm_fprintf (stream
, "%r", REGNO (x
));
24586 output_address (GET_MODE (x
), XEXP (x
, 0));
24592 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
24593 sizeof (fpstr
), 0, 1);
24594 fprintf (stream
, "#%s", fpstr
);
24599 gcc_assert (GET_CODE (x
) != NEG
);
24600 fputc ('#', stream
);
24601 if (GET_CODE (x
) == HIGH
)
24603 fputs (":lower16:", stream
);
24607 output_addr_const (stream
, x
);
24613 /* Target hook for printing a memory address. */
24615 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
24619 int is_minus
= GET_CODE (x
) == MINUS
;
24622 asm_fprintf (stream
, "[%r]", REGNO (x
));
24623 else if (GET_CODE (x
) == PLUS
|| is_minus
)
24625 rtx base
= XEXP (x
, 0);
24626 rtx index
= XEXP (x
, 1);
24627 HOST_WIDE_INT offset
= 0;
24629 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
24631 /* Ensure that BASE is a register. */
24632 /* (one of them must be). */
24633 /* Also ensure the SP is not used as in index register. */
24634 std::swap (base
, index
);
24636 switch (GET_CODE (index
))
24639 offset
= INTVAL (index
);
24642 asm_fprintf (stream
, "[%r, #%wd]",
24643 REGNO (base
), offset
);
24647 asm_fprintf (stream
, "[%r, %s%r]",
24648 REGNO (base
), is_minus
? "-" : "",
24658 asm_fprintf (stream
, "[%r, %s%r",
24659 REGNO (base
), is_minus
? "-" : "",
24660 REGNO (XEXP (index
, 0)));
24661 arm_print_operand (stream
, index
, 'S');
24662 fputs ("]", stream
);
24667 gcc_unreachable ();
24670 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
24671 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
24673 gcc_assert (REG_P (XEXP (x
, 0)));
24675 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
24676 asm_fprintf (stream
, "[%r, #%s%d]!",
24677 REGNO (XEXP (x
, 0)),
24678 GET_CODE (x
) == PRE_DEC
? "-" : "",
24679 GET_MODE_SIZE (mode
));
24680 else if (TARGET_HAVE_MVE
&& (mode
== OImode
|| mode
== XImode
))
24681 asm_fprintf (stream
, "[%r]!", REGNO (XEXP (x
,0)));
24683 asm_fprintf (stream
, "[%r], #%s%d", REGNO (XEXP (x
, 0)),
24684 GET_CODE (x
) == POST_DEC
? "-" : "",
24685 GET_MODE_SIZE (mode
));
24687 else if (GET_CODE (x
) == PRE_MODIFY
)
24689 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
24690 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24691 asm_fprintf (stream
, "#%wd]!",
24692 INTVAL (XEXP (XEXP (x
, 1), 1)));
24694 asm_fprintf (stream
, "%r]!",
24695 REGNO (XEXP (XEXP (x
, 1), 1)));
24697 else if (GET_CODE (x
) == POST_MODIFY
)
24699 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
24700 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
24701 asm_fprintf (stream
, "#%wd",
24702 INTVAL (XEXP (XEXP (x
, 1), 1)));
24704 asm_fprintf (stream
, "%r",
24705 REGNO (XEXP (XEXP (x
, 1), 1)));
24707 else output_addr_const (stream
, x
);
24712 asm_fprintf (stream
, "[%r]", REGNO (x
));
24713 else if (GET_CODE (x
) == POST_INC
)
24714 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
24715 else if (GET_CODE (x
) == PLUS
)
24717 gcc_assert (REG_P (XEXP (x
, 0)));
24718 if (CONST_INT_P (XEXP (x
, 1)))
24719 asm_fprintf (stream
, "[%r, #%wd]",
24720 REGNO (XEXP (x
, 0)),
24721 INTVAL (XEXP (x
, 1)));
24723 asm_fprintf (stream
, "[%r, %r]",
24724 REGNO (XEXP (x
, 0)),
24725 REGNO (XEXP (x
, 1)));
24728 output_addr_const (stream
, x
);
24732 /* Target hook for indicating whether a punctuation character for
24733 TARGET_PRINT_OPERAND is valid. */
24735 arm_print_operand_punct_valid_p (unsigned char code
)
24737 return (code
== '@' || code
== '|' || code
== '.'
24738 || code
== '(' || code
== ')' || code
== '#'
24739 || (TARGET_32BIT
&& (code
== '?'))
24740 || (TARGET_THUMB2
&& (code
== '!'))
24741 || (TARGET_THUMB
&& (code
== '_')));
24744 /* Target hook for assembling integer objects. The ARM version needs to
24745 handle word-sized values specially. */
24747 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
24751 if (size
== UNITS_PER_WORD
&& aligned_p
)
24753 fputs ("\t.word\t", asm_out_file
);
24754 output_addr_const (asm_out_file
, x
);
24756 /* Mark symbols as position independent. We only do this in the
24757 .text segment, not in the .data segment. */
24758 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
24759 (SYMBOL_REF_P (x
) || LABEL_REF_P (x
)))
24761 /* See legitimize_pic_address for an explanation of the
24762 TARGET_VXWORKS_RTP check. */
24763 /* References to weak symbols cannot be resolved locally:
24764 they may be overridden by a non-weak definition at link
24766 if (!arm_pic_data_is_text_relative
24767 || (SYMBOL_REF_P (x
)
24768 && (!SYMBOL_REF_LOCAL_P (x
)
24769 || (SYMBOL_REF_DECL (x
)
24770 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0)
24771 || (SYMBOL_REF_FUNCTION_P (x
)
24772 && !arm_fdpic_local_funcdesc_p (x
)))))
24774 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24775 fputs ("(GOTFUNCDESC)", asm_out_file
);
24777 fputs ("(GOT)", asm_out_file
);
24781 if (TARGET_FDPIC
&& SYMBOL_REF_FUNCTION_P (x
))
24782 fputs ("(GOTOFFFUNCDESC)", asm_out_file
);
24788 || arm_is_segment_info_known (x
, &is_readonly
))
24789 fputs ("(GOTOFF)", asm_out_file
);
24791 fputs ("(GOT)", asm_out_file
);
24796 /* For FDPIC we also have to mark symbol for .data section. */
24798 && !making_const_table
24799 && SYMBOL_REF_P (x
)
24800 && SYMBOL_REF_FUNCTION_P (x
))
24801 fputs ("(FUNCDESC)", asm_out_file
);
24803 fputc ('\n', asm_out_file
);
24807 mode
= GET_MODE (x
);
24809 if (arm_vector_mode_supported_p (mode
))
24813 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
24815 units
= CONST_VECTOR_NUNITS (x
);
24816 size
= GET_MODE_UNIT_SIZE (mode
);
24818 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
24819 for (i
= 0; i
< units
; i
++)
24821 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24823 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
24826 for (i
= 0; i
< units
; i
++)
24828 rtx elt
= CONST_VECTOR_ELT (x
, i
);
24830 (*CONST_DOUBLE_REAL_VALUE (elt
),
24831 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
24832 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
24838 return default_assemble_integer (x
, size
, aligned_p
);
24842 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
24846 if (!TARGET_AAPCS_BASED
)
24849 default_named_section_asm_out_constructor
24850 : default_named_section_asm_out_destructor
) (symbol
, priority
);
24854 /* Put these in the .init_array section, using a special relocation. */
24855 if (priority
!= DEFAULT_INIT_PRIORITY
)
24858 sprintf (buf
, "%s.%.5u",
24859 is_ctor
? ".init_array" : ".fini_array",
24861 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
24868 switch_to_section (s
);
24869 assemble_align (POINTER_SIZE
);
24870 fputs ("\t.word\t", asm_out_file
);
24871 output_addr_const (asm_out_file
, symbol
);
24872 fputs ("(target1)\n", asm_out_file
);
24875 /* Add a function to the list of static constructors. */
24878 arm_elf_asm_constructor (rtx symbol
, int priority
)
24880 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
24883 /* Add a function to the list of static destructors. */
24886 arm_elf_asm_destructor (rtx symbol
, int priority
)
24888 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
24891 /* A finite state machine takes care of noticing whether or not instructions
24892 can be conditionally executed, and thus decrease execution time and code
24893 size by deleting branch instructions. The fsm is controlled by
24894 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24896 /* The state of the fsm controlling condition codes are:
24897 0: normal, do nothing special
24898 1: make ASM_OUTPUT_OPCODE not output this instruction
24899 2: make ASM_OUTPUT_OPCODE not output this instruction
24900 3: make instructions conditional
24901 4: make instructions conditional
24903 State transitions (state->state by whom under condition):
24904 0 -> 1 final_prescan_insn if the `target' is a label
24905 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24906 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24907 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24908 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24909 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24910 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24911 (the target insn is arm_target_insn).
24913 If the jump clobbers the conditions then we use states 2 and 4.
24915 A similar thing can be done with conditional return insns.
24917 XXX In case the `target' is an unconditional branch, this conditionalising
24918 of the instructions always reduces code size, but not always execution
24919 time. But then, I want to reduce the code size to somewhere near what
24920 /bin/cc produces. */
24922 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24923 instructions. When a COND_EXEC instruction is seen the subsequent
24924 instructions are scanned so that multiple conditional instructions can be
24925 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24926 specify the length and true/false mask for the IT block. These will be
24927 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24929 /* Returns the index of the ARM condition code string in
24930 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24931 COMPARISON should be an rtx like `(eq (...) (...))'. */
24934 maybe_get_arm_condition_code (rtx comparison
)
24936 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
24937 enum arm_cond_code code
;
24938 enum rtx_code comp_code
= GET_CODE (comparison
);
24940 if (GET_MODE_CLASS (mode
) != MODE_CC
)
24941 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
24942 XEXP (comparison
, 1));
24946 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
24947 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
24948 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
24949 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
24950 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
24951 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
24952 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
24953 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
24954 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
24955 case E_CC_DLTUmode
: code
= ARM_CC
;
24958 if (comp_code
== EQ
)
24959 return ARM_INVERSE_CONDITION_CODE (code
);
24960 if (comp_code
== NE
)
24967 case NE
: return ARM_NE
;
24968 case EQ
: return ARM_EQ
;
24969 case GE
: return ARM_PL
;
24970 case LT
: return ARM_MI
;
24971 default: return ARM_NV
;
24977 case NE
: return ARM_NE
;
24978 case EQ
: return ARM_EQ
;
24979 default: return ARM_NV
;
24985 case NE
: return ARM_MI
;
24986 case EQ
: return ARM_PL
;
24987 default: return ARM_NV
;
24992 /* We can handle all cases except UNEQ and LTGT. */
24995 case GE
: return ARM_GE
;
24996 case GT
: return ARM_GT
;
24997 case LE
: return ARM_LS
;
24998 case LT
: return ARM_MI
;
24999 case NE
: return ARM_NE
;
25000 case EQ
: return ARM_EQ
;
25001 case ORDERED
: return ARM_VC
;
25002 case UNORDERED
: return ARM_VS
;
25003 case UNLT
: return ARM_LT
;
25004 case UNLE
: return ARM_LE
;
25005 case UNGT
: return ARM_HI
;
25006 case UNGE
: return ARM_PL
;
25007 /* UNEQ and LTGT do not have a representation. */
25008 case UNEQ
: /* Fall through. */
25009 case LTGT
: /* Fall through. */
25010 default: return ARM_NV
;
25016 case NE
: return ARM_NE
;
25017 case EQ
: return ARM_EQ
;
25018 case GE
: return ARM_LE
;
25019 case GT
: return ARM_LT
;
25020 case LE
: return ARM_GE
;
25021 case LT
: return ARM_GT
;
25022 case GEU
: return ARM_LS
;
25023 case GTU
: return ARM_CC
;
25024 case LEU
: return ARM_CS
;
25025 case LTU
: return ARM_HI
;
25026 default: return ARM_NV
;
25032 case LTU
: return ARM_CS
;
25033 case GEU
: return ARM_CC
;
25034 default: return ARM_NV
;
25040 case GE
: return ARM_GE
;
25041 case LT
: return ARM_LT
;
25042 default: return ARM_NV
;
25048 case GEU
: return ARM_CS
;
25049 case LTU
: return ARM_CC
;
25050 default: return ARM_NV
;
25056 case NE
: return ARM_VS
;
25057 case EQ
: return ARM_VC
;
25058 default: return ARM_NV
;
25064 case GEU
: return ARM_CS
;
25065 case LTU
: return ARM_CC
;
25066 default: return ARM_NV
;
25073 case NE
: return ARM_NE
;
25074 case EQ
: return ARM_EQ
;
25075 case GE
: return ARM_GE
;
25076 case GT
: return ARM_GT
;
25077 case LE
: return ARM_LE
;
25078 case LT
: return ARM_LT
;
25079 case GEU
: return ARM_CS
;
25080 case GTU
: return ARM_HI
;
25081 case LEU
: return ARM_LS
;
25082 case LTU
: return ARM_CC
;
25083 default: return ARM_NV
;
25086 default: gcc_unreachable ();
25090 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25091 static enum arm_cond_code
25092 get_arm_condition_code (rtx comparison
)
25094 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
25095 gcc_assert (code
!= ARM_NV
);
25099 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25100 code registers when not targetting Thumb1. The VFP condition register
25101 only exists when generating hard-float code. */
25103 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
25109 *p2
= TARGET_VFP_BASE
? VFPCC_REGNUM
: INVALID_REGNUM
;
25113 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25116 thumb2_final_prescan_insn (rtx_insn
*insn
)
25118 rtx_insn
*first_insn
= insn
;
25119 rtx body
= PATTERN (insn
);
25121 enum arm_cond_code code
;
25126 /* max_insns_skipped in the tune was already taken into account in the
25127 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25128 just emit the IT blocks as we can. It does not make sense to split
25130 max
= MAX_INSN_PER_IT_BLOCK
;
25132 /* Remove the previous insn from the count of insns to be output. */
25133 if (arm_condexec_count
)
25134 arm_condexec_count
--;
25136 /* Nothing to do if we are already inside a conditional block. */
25137 if (arm_condexec_count
)
25140 if (GET_CODE (body
) != COND_EXEC
)
25143 /* Conditional jumps are implemented directly. */
25147 predicate
= COND_EXEC_TEST (body
);
25148 arm_current_cc
= get_arm_condition_code (predicate
);
25150 n
= get_attr_ce_count (insn
);
25151 arm_condexec_count
= 1;
25152 arm_condexec_mask
= (1 << n
) - 1;
25153 arm_condexec_masklen
= n
;
25154 /* See if subsequent instructions can be combined into the same block. */
25157 insn
= next_nonnote_insn (insn
);
25159 /* Jumping into the middle of an IT block is illegal, so a label or
25160 barrier terminates the block. */
25161 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
25164 body
= PATTERN (insn
);
25165 /* USE and CLOBBER aren't really insns, so just skip them. */
25166 if (GET_CODE (body
) == USE
25167 || GET_CODE (body
) == CLOBBER
)
25170 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25171 if (GET_CODE (body
) != COND_EXEC
)
25173 /* Maximum number of conditionally executed instructions in a block. */
25174 n
= get_attr_ce_count (insn
);
25175 if (arm_condexec_masklen
+ n
> max
)
25178 predicate
= COND_EXEC_TEST (body
);
25179 code
= get_arm_condition_code (predicate
);
25180 mask
= (1 << n
) - 1;
25181 if (arm_current_cc
== code
)
25182 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
25183 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
25186 arm_condexec_count
++;
25187 arm_condexec_masklen
+= n
;
25189 /* A jump must be the last instruction in a conditional block. */
25193 /* Restore recog_data (getting the attributes of other insns can
25194 destroy this array, but final.cc assumes that it remains intact
25195 across this call). */
25196 extract_constrain_insn_cached (first_insn
);
25200 arm_final_prescan_insn (rtx_insn
*insn
)
25202 /* BODY will hold the body of INSN. */
25203 rtx body
= PATTERN (insn
);
25205 /* This will be 1 if trying to repeat the trick, and things need to be
25206 reversed if it appears to fail. */
25209 /* If we start with a return insn, we only succeed if we find another one. */
25210 int seeking_return
= 0;
25211 enum rtx_code return_code
= UNKNOWN
;
25213 /* START_INSN will hold the insn from where we start looking. This is the
25214 first insn after the following code_label if REVERSE is true. */
25215 rtx_insn
*start_insn
= insn
;
25217 /* If in state 4, check if the target branch is reached, in order to
25218 change back to state 0. */
25219 if (arm_ccfsm_state
== 4)
25221 if (insn
== arm_target_insn
)
25223 arm_target_insn
= NULL
;
25224 arm_ccfsm_state
= 0;
25229 /* If in state 3, it is possible to repeat the trick, if this insn is an
25230 unconditional branch to a label, and immediately following this branch
25231 is the previous target label which is only used once, and the label this
25232 branch jumps to is not too far off. */
25233 if (arm_ccfsm_state
== 3)
25235 if (simplejump_p (insn
))
25237 start_insn
= next_nonnote_insn (start_insn
);
25238 if (BARRIER_P (start_insn
))
25240 /* XXX Isn't this always a barrier? */
25241 start_insn
= next_nonnote_insn (start_insn
);
25243 if (LABEL_P (start_insn
)
25244 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25245 && LABEL_NUSES (start_insn
) == 1)
25250 else if (ANY_RETURN_P (body
))
25252 start_insn
= next_nonnote_insn (start_insn
);
25253 if (BARRIER_P (start_insn
))
25254 start_insn
= next_nonnote_insn (start_insn
);
25255 if (LABEL_P (start_insn
)
25256 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
25257 && LABEL_NUSES (start_insn
) == 1)
25260 seeking_return
= 1;
25261 return_code
= GET_CODE (body
);
25270 gcc_assert (!arm_ccfsm_state
|| reverse
);
25271 if (!JUMP_P (insn
))
25274 /* This jump might be paralleled with a clobber of the condition codes
25275 the jump should always come first */
25276 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
25277 body
= XVECEXP (body
, 0, 0);
25280 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
25281 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
25284 int fail
= FALSE
, succeed
= FALSE
;
25285 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25286 int then_not_else
= TRUE
;
25287 rtx_insn
*this_insn
= start_insn
;
25290 /* Register the insn jumped to. */
25293 if (!seeking_return
)
25294 label
= XEXP (SET_SRC (body
), 0);
25296 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
25297 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
25298 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
25300 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
25301 then_not_else
= FALSE
;
25303 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
25305 seeking_return
= 1;
25306 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
25308 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
25310 seeking_return
= 1;
25311 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
25312 then_not_else
= FALSE
;
25315 gcc_unreachable ();
25317 /* See how many insns this branch skips, and what kind of insns. If all
25318 insns are okay, and the label or unconditional branch to the same
25319 label is not too far away, succeed. */
25320 for (insns_skipped
= 0;
25321 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
25325 this_insn
= next_nonnote_insn (this_insn
);
25329 switch (GET_CODE (this_insn
))
25332 /* Succeed if it is the target label, otherwise fail since
25333 control falls in from somewhere else. */
25334 if (this_insn
== label
)
25336 arm_ccfsm_state
= 1;
25344 /* Succeed if the following insn is the target label.
25346 If return insns are used then the last insn in a function
25347 will be a barrier. */
25348 this_insn
= next_nonnote_insn (this_insn
);
25349 if (this_insn
&& this_insn
== label
)
25351 arm_ccfsm_state
= 1;
25359 /* The AAPCS says that conditional calls should not be
25360 used since they make interworking inefficient (the
25361 linker can't transform BL<cond> into BLX). That's
25362 only a problem if the machine has BLX. */
25369 /* Succeed if the following insn is the target label, or
25370 if the following two insns are a barrier and the
25372 this_insn
= next_nonnote_insn (this_insn
);
25373 if (this_insn
&& BARRIER_P (this_insn
))
25374 this_insn
= next_nonnote_insn (this_insn
);
25376 if (this_insn
&& this_insn
== label
25377 && insns_skipped
< max_insns_skipped
)
25379 arm_ccfsm_state
= 1;
25387 /* If this is an unconditional branch to the same label, succeed.
25388 If it is to another label, do nothing. If it is conditional,
25390 /* XXX Probably, the tests for SET and the PC are
25393 scanbody
= PATTERN (this_insn
);
25394 if (GET_CODE (scanbody
) == SET
25395 && GET_CODE (SET_DEST (scanbody
)) == PC
)
25397 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
25398 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
25400 arm_ccfsm_state
= 2;
25403 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
25406 /* Fail if a conditional return is undesirable (e.g. on a
25407 StrongARM), but still allow this if optimizing for size. */
25408 else if (GET_CODE (scanbody
) == return_code
25409 && !use_return_insn (TRUE
, NULL
)
25412 else if (GET_CODE (scanbody
) == return_code
)
25414 arm_ccfsm_state
= 2;
25417 else if (GET_CODE (scanbody
) == PARALLEL
)
25419 switch (get_attr_conds (this_insn
))
25429 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
25434 /* Instructions using or affecting the condition codes make it
25436 scanbody
= PATTERN (this_insn
);
25437 if (!(GET_CODE (scanbody
) == SET
25438 || GET_CODE (scanbody
) == PARALLEL
)
25439 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
25449 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
25450 arm_target_label
= CODE_LABEL_NUMBER (label
);
25453 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
25455 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
25457 this_insn
= next_nonnote_insn (this_insn
);
25458 gcc_assert (!this_insn
25459 || (!BARRIER_P (this_insn
)
25460 && !LABEL_P (this_insn
)));
25464 /* Oh, dear! we ran off the end.. give up. */
25465 extract_constrain_insn_cached (insn
);
25466 arm_ccfsm_state
= 0;
25467 arm_target_insn
= NULL
;
25470 arm_target_insn
= this_insn
;
25473 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25476 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
25478 if (reverse
|| then_not_else
)
25479 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
25482 /* Restore recog_data (getting the attributes of other insns can
25483 destroy this array, but final.cc assumes that it remains intact
25484 across this call. */
25485 extract_constrain_insn_cached (insn
);
25489 /* Output IT instructions. */
25491 thumb2_asm_output_opcode (FILE * stream
)
25496 if (arm_condexec_mask
)
25498 for (n
= 0; n
< arm_condexec_masklen
; n
++)
25499 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
25501 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
25502 arm_condition_codes
[arm_current_cc
]);
25503 arm_condexec_mask
= 0;
25507 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25508 UNITS_PER_WORD bytes wide. */
25509 static unsigned int
25510 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
25512 if (IS_VPR_REGNUM (regno
))
25513 return CEIL (GET_MODE_SIZE (mode
), 2);
25516 && regno
> PC_REGNUM
25517 && regno
!= FRAME_POINTER_REGNUM
25518 && regno
!= ARG_POINTER_REGNUM
25519 && !IS_VFP_REGNUM (regno
))
25522 return ARM_NUM_REGS (mode
);
25525 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25527 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
25529 if (GET_MODE_CLASS (mode
) == MODE_CC
)
25530 return (regno
== CC_REGNUM
25531 || (TARGET_VFP_BASE
25532 && regno
== VFPCC_REGNUM
));
25534 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
25537 if (IS_VPR_REGNUM (regno
))
25538 return mode
== HImode
25539 || mode
== V16BImode
25540 || mode
== V8BImode
25541 || mode
== V4BImode
;
25544 /* For the Thumb we only allow values bigger than SImode in
25545 registers 0 - 6, so that there is always a second low
25546 register available to hold the upper part of the value.
25547 We probably we ought to ensure that the register is the
25548 start of an even numbered register pair. */
25549 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
25551 if (TARGET_VFP_BASE
&& IS_VFP_REGNUM (regno
))
25553 if (mode
== DFmode
|| mode
== DImode
)
25554 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
25556 if (mode
== HFmode
|| mode
== BFmode
|| mode
== HImode
25557 || mode
== SFmode
|| mode
== SImode
)
25558 return VFP_REGNO_OK_FOR_SINGLE (regno
);
25561 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
25562 || (VALID_NEON_QREG_MODE (mode
)
25563 && NEON_REGNO_OK_FOR_QUAD (regno
))
25564 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
25565 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
25566 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25567 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
25568 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
25569 if (TARGET_HAVE_MVE
)
25570 return ((VALID_MVE_MODE (mode
) && NEON_REGNO_OK_FOR_QUAD (regno
))
25571 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
25572 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8)));
25577 if (TARGET_REALLY_IWMMXT
)
25579 if (IS_IWMMXT_GR_REGNUM (regno
))
25580 return mode
== SImode
;
25582 if (IS_IWMMXT_REGNUM (regno
))
25583 return VALID_IWMMXT_REG_MODE (mode
);
25586 /* We allow almost any value to be stored in the general registers.
25587 Restrict doubleword quantities to even register pairs in ARM state
25588 so that we can use ldrd. The same restriction applies for MVE
25589 in order to support Armv8.1-M Mainline instructions.
25590 Do not allow very large Neon structure opaque modes in general
25591 registers; they would use too many. */
25592 if (regno
<= LAST_ARM_REGNUM
)
25594 if (ARM_NUM_REGS (mode
) > 4)
25597 if (TARGET_THUMB2
&& !(TARGET_HAVE_MVE
|| TARGET_CDE
))
25600 return !((TARGET_LDRD
|| TARGET_CDE
)
25601 && GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
25604 if (regno
== FRAME_POINTER_REGNUM
25605 || regno
== ARG_POINTER_REGNUM
)
25606 /* We only allow integers in the fake hard registers. */
25607 return GET_MODE_CLASS (mode
) == MODE_INT
;
25612 /* Implement TARGET_MODES_TIEABLE_P. */
25615 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
25617 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
25620 /* We specifically want to allow elements of "structure" modes to
25621 be tieable to the structure. This more general condition allows
25622 other rarer situations too. */
25624 && (VALID_NEON_DREG_MODE (mode1
)
25625 || VALID_NEON_QREG_MODE (mode1
)
25626 || VALID_NEON_STRUCT_MODE (mode1
))
25627 && (VALID_NEON_DREG_MODE (mode2
)
25628 || VALID_NEON_QREG_MODE (mode2
)
25629 || VALID_NEON_STRUCT_MODE (mode2
)))
25630 || (TARGET_HAVE_MVE
25631 && (VALID_MVE_MODE (mode1
)
25632 || VALID_MVE_STRUCT_MODE (mode1
))
25633 && (VALID_MVE_MODE (mode2
)
25634 || VALID_MVE_STRUCT_MODE (mode2
))))
25640 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25641 not used in arm mode. */
25644 arm_regno_class (int regno
)
25646 if (regno
== PC_REGNUM
)
25649 if (IS_VPR_REGNUM (regno
))
25654 if (regno
== STACK_POINTER_REGNUM
)
25656 if (regno
== CC_REGNUM
)
25663 if (TARGET_THUMB2
&& regno
< 8)
25666 if ( regno
<= LAST_ARM_REGNUM
25667 || regno
== FRAME_POINTER_REGNUM
25668 || regno
== ARG_POINTER_REGNUM
)
25669 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
25671 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
25672 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
25674 if (IS_VFP_REGNUM (regno
))
25676 if (regno
<= D7_VFP_REGNUM
)
25677 return VFP_D0_D7_REGS
;
25678 else if (regno
<= LAST_LO_VFP_REGNUM
)
25679 return VFP_LO_REGS
;
25681 return VFP_HI_REGS
;
25684 if (IS_IWMMXT_REGNUM (regno
))
25685 return IWMMXT_REGS
;
25687 if (IS_IWMMXT_GR_REGNUM (regno
))
25688 return IWMMXT_GR_REGS
;
25693 /* Handle a special case when computing the offset
25694 of an argument from the frame pointer. */
25696 arm_debugger_arg_offset (int value
, rtx addr
)
25700 /* We are only interested if dbxout_parms() failed to compute the offset. */
25704 /* We can only cope with the case where the address is held in a register. */
25708 /* If we are using the frame pointer to point at the argument, then
25709 an offset of 0 is correct. */
25710 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
25713 /* If we are using the stack pointer to point at the
25714 argument, then an offset of 0 is correct. */
25715 /* ??? Check this is consistent with thumb2 frame layout. */
25716 if ((TARGET_THUMB
|| !frame_pointer_needed
)
25717 && REGNO (addr
) == SP_REGNUM
)
25720 /* Oh dear. The argument is pointed to by a register rather
25721 than being held in a register, or being stored at a known
25722 offset from the frame pointer. Since GDB only understands
25723 those two kinds of argument we must translate the address
25724 held in the register into an offset from the frame pointer.
25725 We do this by searching through the insns for the function
25726 looking to see where this register gets its value. If the
25727 register is initialized from the frame pointer plus an offset
25728 then we are in luck and we can continue, otherwise we give up.
25730 This code is exercised by producing debugging information
25731 for a function with arguments like this:
25733 double func (double a, double b, int c, double d) {return d;}
25735 Without this code the stab for parameter 'd' will be set to
25736 an offset of 0 from the frame pointer, rather than 8. */
25738 /* The if() statement says:
25740 If the insn is a normal instruction
25741 and if the insn is setting the value in a register
25742 and if the register being set is the register holding the address of the argument
25743 and if the address is computing by an addition
25744 that involves adding to a register
25745 which is the frame pointer
25750 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
25752 if ( NONJUMP_INSN_P (insn
)
25753 && GET_CODE (PATTERN (insn
)) == SET
25754 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
25755 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
25756 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
25757 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25758 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
25761 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
25770 warning (0, "unable to compute real location of stacked parameter");
25771 value
= 8; /* XXX magic hack */
25777 /* Implement TARGET_PROMOTED_TYPE. */
25780 arm_promoted_type (const_tree t
)
25782 if (SCALAR_FLOAT_TYPE_P (t
)
25783 && TYPE_PRECISION (t
) == 16
25784 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
25785 return float_type_node
;
25789 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25790 This simply adds HFmode as a supported mode; even though we don't
25791 implement arithmetic on this type directly, it's supported by
25792 optabs conversions, much the way the double-word arithmetic is
25793 special-cased in the default hook. */
25796 arm_scalar_mode_supported_p (scalar_mode mode
)
25798 if (mode
== HFmode
)
25799 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
25800 else if (ALL_FIXED_POINT_MODE_P (mode
))
25803 return default_scalar_mode_supported_p (mode
);
25806 /* Set the value of FLT_EVAL_METHOD.
25807 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25809 0: evaluate all operations and constants, whose semantic type has at
25810 most the range and precision of type float, to the range and
25811 precision of float; evaluate all other operations and constants to
25812 the range and precision of the semantic type;
25814 N, where _FloatN is a supported interchange floating type
25815 evaluate all operations and constants, whose semantic type has at
25816 most the range and precision of _FloatN type, to the range and
25817 precision of the _FloatN type; evaluate all other operations and
25818 constants to the range and precision of the semantic type;
25820 If we have the ARMv8.2-A extensions then we support _Float16 in native
25821 precision, so we should set this to 16. Otherwise, we support the type,
25822 but want to evaluate expressions in float precision, so set this to
25825 static enum flt_eval_method
25826 arm_excess_precision (enum excess_precision_type type
)
25830 case EXCESS_PRECISION_TYPE_FAST
:
25831 case EXCESS_PRECISION_TYPE_STANDARD
:
25832 /* We can calculate either in 16-bit range and precision or
25833 32-bit range and precision. Make that decision based on whether
25834 we have native support for the ARMv8.2-A 16-bit floating-point
25835 instructions or not. */
25836 return (TARGET_VFP_FP16INST
25837 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25838 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
25839 case EXCESS_PRECISION_TYPE_IMPLICIT
:
25840 case EXCESS_PRECISION_TYPE_FLOAT16
:
25841 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
25843 gcc_unreachable ();
25845 return FLT_EVAL_METHOD_UNPREDICTABLE
;
25849 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25850 _Float16 if we are using anything other than ieee format for 16-bit
25851 floating point. Otherwise, punt to the default implementation. */
25852 static opt_scalar_float_mode
25853 arm_floatn_mode (int n
, bool extended
)
25855 if (!extended
&& n
== 16)
25857 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
25859 return opt_scalar_float_mode ();
25862 return default_floatn_mode (n
, extended
);
25866 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25867 not to early-clobber SRC registers in the process.
25869 We assume that the operands described by SRC and DEST represent a
25870 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25871 number of components into which the copy has been decomposed. */
25873 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
25877 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
25878 || REGNO (operands
[0]) < REGNO (operands
[1]))
25880 for (i
= 0; i
< count
; i
++)
25882 operands
[2 * i
] = dest
[i
];
25883 operands
[2 * i
+ 1] = src
[i
];
25888 for (i
= 0; i
< count
; i
++)
25890 operands
[2 * i
] = dest
[count
- i
- 1];
25891 operands
[2 * i
+ 1] = src
[count
- i
- 1];
25896 /* Split operands into moves from op[1] + op[2] into op[0]. */
25899 neon_split_vcombine (rtx operands
[3])
25901 unsigned int dest
= REGNO (operands
[0]);
25902 unsigned int src1
= REGNO (operands
[1]);
25903 unsigned int src2
= REGNO (operands
[2]);
25904 machine_mode halfmode
= GET_MODE (operands
[1]);
25905 unsigned int halfregs
= REG_NREGS (operands
[1]);
25906 rtx destlo
, desthi
;
25908 if (src1
== dest
&& src2
== dest
+ halfregs
)
25910 /* No-op move. Can't split to nothing; emit something. */
25911 emit_note (NOTE_INSN_DELETED
);
25915 /* Preserve register attributes for variable tracking. */
25916 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
25917 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
25918 GET_MODE_SIZE (halfmode
));
25920 /* Special case of reversed high/low parts. Use VSWP. */
25921 if (src2
== dest
&& src1
== dest
+ halfregs
)
25923 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
25924 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
25925 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
25929 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
25931 /* Try to avoid unnecessary moves if part of the result
25932 is in the right place already. */
25934 emit_move_insn (destlo
, operands
[1]);
25935 if (src2
!= dest
+ halfregs
)
25936 emit_move_insn (desthi
, operands
[2]);
25940 if (src2
!= dest
+ halfregs
)
25941 emit_move_insn (desthi
, operands
[2]);
25943 emit_move_insn (destlo
, operands
[1]);
25947 /* Return the number (counting from 0) of
25948 the least significant set bit in MASK. */
25951 number_of_first_bit_set (unsigned mask
)
25953 return ctz_hwi (mask
);
25956 /* Like emit_multi_reg_push, but allowing for a different set of
25957 registers to be described as saved. MASK is the set of registers
25958 to be saved; REAL_REGS is the set of registers to be described as
25959 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25962 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
25964 unsigned long regno
;
25965 rtx par
[10], tmp
, reg
;
25969 /* Build the parallel of the registers actually being stored. */
25970 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
25972 regno
= ctz_hwi (mask
);
25973 reg
= gen_rtx_REG (SImode
, regno
);
25976 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
25978 tmp
= gen_rtx_USE (VOIDmode
, reg
);
25983 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25984 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
25985 tmp
= gen_frame_mem (BLKmode
, tmp
);
25986 tmp
= gen_rtx_SET (tmp
, par
[0]);
25989 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
25990 insn
= emit_insn (tmp
);
25992 /* Always build the stack adjustment note for unwind info. */
25993 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
25994 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
25997 /* Build the parallel of the registers recorded as saved for unwind. */
25998 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
26000 regno
= ctz_hwi (real_regs
);
26001 reg
= gen_rtx_REG (SImode
, regno
);
26003 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
26004 tmp
= gen_frame_mem (SImode
, tmp
);
26005 tmp
= gen_rtx_SET (tmp
, reg
);
26006 RTX_FRAME_RELATED_P (tmp
) = 1;
26014 RTX_FRAME_RELATED_P (par
[0]) = 1;
26015 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
26018 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
26023 /* Emit code to push or pop registers to or from the stack. F is the
26024 assembly file. MASK is the registers to pop. */
26026 thumb_pop (FILE *f
, unsigned long mask
)
26029 int lo_mask
= mask
& 0xFF;
26033 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
26035 /* Special case. Do not generate a POP PC statement here, do it in
26037 thumb_exit (f
, -1);
26041 fprintf (f
, "\tpop\t{");
26043 /* Look at the low registers first. */
26044 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
26048 asm_fprintf (f
, "%r", regno
);
26050 if ((lo_mask
& ~1) != 0)
26055 if (mask
& (1 << PC_REGNUM
))
26057 /* Catch popping the PC. */
26058 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
26059 || IS_CMSE_ENTRY (arm_current_func_type ()))
26061 /* The PC is never poped directly, instead
26062 it is popped into r3 and then BX is used. */
26063 fprintf (f
, "}\n");
26065 thumb_exit (f
, -1);
26074 asm_fprintf (f
, "%r", PC_REGNUM
);
26078 fprintf (f
, "}\n");
26081 /* Generate code to return from a thumb function.
26082 If 'reg_containing_return_addr' is -1, then the return address is
26083 actually on the stack, at the stack pointer.
26085 Note: do not forget to update length attribute of corresponding insn pattern
26086 when changing assembly output (eg. length attribute of epilogue_insns when
26087 updating Armv8-M Baseline Security Extensions register clearing
26090 thumb_exit (FILE *f
, int reg_containing_return_addr
)
26092 unsigned regs_available_for_popping
;
26093 unsigned regs_to_pop
;
26095 unsigned available
;
26099 int restore_a4
= FALSE
;
26101 /* Compute the registers we need to pop. */
26105 if (reg_containing_return_addr
== -1)
26107 regs_to_pop
|= 1 << LR_REGNUM
;
26111 if (TARGET_BACKTRACE
)
26113 /* Restore the (ARM) frame pointer and stack pointer. */
26114 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
26118 /* If there is nothing to pop then just emit the BX instruction and
26120 if (pops_needed
== 0)
26122 if (crtl
->calls_eh_return
)
26123 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26125 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26127 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26128 emitted by cmse_nonsecure_entry_clear_before_return (). */
26129 if (!TARGET_HAVE_FPCXT_CMSE
)
26130 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
26131 reg_containing_return_addr
);
26132 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26135 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26138 /* Otherwise if we are not supporting interworking and we have not created
26139 a backtrace structure and the function was not entered in ARM mode then
26140 just pop the return address straight into the PC. */
26141 else if (!TARGET_INTERWORK
26142 && !TARGET_BACKTRACE
26143 && !is_called_in_ARM_mode (current_function_decl
)
26144 && !crtl
->calls_eh_return
26145 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26147 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
26151 /* Find out how many of the (return) argument registers we can corrupt. */
26152 regs_available_for_popping
= 0;
26154 /* If returning via __builtin_eh_return, the bottom three registers
26155 all contain information needed for the return. */
26156 if (crtl
->calls_eh_return
)
26160 /* If we can deduce the registers used from the function's
26161 return value. This is more reliable that examining
26162 df_regs_ever_live_p () because that will be set if the register is
26163 ever used in the function, not just if the register is used
26164 to hold a return value. */
26166 if (crtl
->return_rtx
!= 0)
26167 mode
= GET_MODE (crtl
->return_rtx
);
26169 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
26171 size
= GET_MODE_SIZE (mode
);
26175 /* In a void function we can use any argument register.
26176 In a function that returns a structure on the stack
26177 we can use the second and third argument registers. */
26178 if (mode
== VOIDmode
)
26179 regs_available_for_popping
=
26180 (1 << ARG_REGISTER (1))
26181 | (1 << ARG_REGISTER (2))
26182 | (1 << ARG_REGISTER (3));
26184 regs_available_for_popping
=
26185 (1 << ARG_REGISTER (2))
26186 | (1 << ARG_REGISTER (3));
26188 else if (size
<= 4)
26189 regs_available_for_popping
=
26190 (1 << ARG_REGISTER (2))
26191 | (1 << ARG_REGISTER (3));
26192 else if (size
<= 8)
26193 regs_available_for_popping
=
26194 (1 << ARG_REGISTER (3));
26197 /* Match registers to be popped with registers into which we pop them. */
26198 for (available
= regs_available_for_popping
,
26199 required
= regs_to_pop
;
26200 required
!= 0 && available
!= 0;
26201 available
&= ~(available
& - available
),
26202 required
&= ~(required
& - required
))
26205 /* If we have any popping registers left over, remove them. */
26207 regs_available_for_popping
&= ~available
;
26209 /* Otherwise if we need another popping register we can use
26210 the fourth argument register. */
26211 else if (pops_needed
)
26213 /* If we have not found any free argument registers and
26214 reg a4 contains the return address, we must move it. */
26215 if (regs_available_for_popping
== 0
26216 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
26218 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26219 reg_containing_return_addr
= LR_REGNUM
;
26221 else if (size
> 12)
26223 /* Register a4 is being used to hold part of the return value,
26224 but we have dire need of a free, low register. */
26227 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
26230 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
26232 /* The fourth argument register is available. */
26233 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
26239 /* Pop as many registers as we can. */
26240 thumb_pop (f
, regs_available_for_popping
);
26242 /* Process the registers we popped. */
26243 if (reg_containing_return_addr
== -1)
26245 /* The return address was popped into the lowest numbered register. */
26246 regs_to_pop
&= ~(1 << LR_REGNUM
);
26248 reg_containing_return_addr
=
26249 number_of_first_bit_set (regs_available_for_popping
);
26251 /* Remove this register for the mask of available registers, so that
26252 the return address will not be corrupted by further pops. */
26253 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
26256 /* If we popped other registers then handle them here. */
26257 if (regs_available_for_popping
)
26261 /* Work out which register currently contains the frame pointer. */
26262 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26264 /* Move it into the correct place. */
26265 asm_fprintf (f
, "\tmov\t%r, %r\n",
26266 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
26268 /* (Temporarily) remove it from the mask of popped registers. */
26269 regs_available_for_popping
&= ~(1 << frame_pointer
);
26270 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
26272 if (regs_available_for_popping
)
26276 /* We popped the stack pointer as well,
26277 find the register that contains it. */
26278 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
26280 /* Move it into the stack register. */
26281 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
26283 /* At this point we have popped all necessary registers, so
26284 do not worry about restoring regs_available_for_popping
26285 to its correct value:
26287 assert (pops_needed == 0)
26288 assert (regs_available_for_popping == (1 << frame_pointer))
26289 assert (regs_to_pop == (1 << STACK_POINTER)) */
26293 /* Since we have just move the popped value into the frame
26294 pointer, the popping register is available for reuse, and
26295 we know that we still have the stack pointer left to pop. */
26296 regs_available_for_popping
|= (1 << frame_pointer
);
26300 /* If we still have registers left on the stack, but we no longer have
26301 any registers into which we can pop them, then we must move the return
26302 address into the link register and make available the register that
26304 if (regs_available_for_popping
== 0 && pops_needed
> 0)
26306 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
26308 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
26309 reg_containing_return_addr
);
26311 reg_containing_return_addr
= LR_REGNUM
;
26314 /* If we have registers left on the stack then pop some more.
26315 We know that at most we will want to pop FP and SP. */
26316 if (pops_needed
> 0)
26321 thumb_pop (f
, regs_available_for_popping
);
26323 /* We have popped either FP or SP.
26324 Move whichever one it is into the correct register. */
26325 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26326 move_to
= number_of_first_bit_set (regs_to_pop
);
26328 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
26332 /* If we still have not popped everything then we must have only
26333 had one register available to us and we are now popping the SP. */
26334 if (pops_needed
> 0)
26338 thumb_pop (f
, regs_available_for_popping
);
26340 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
26342 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
26344 assert (regs_to_pop == (1 << STACK_POINTER))
26345 assert (pops_needed == 1)
26349 /* If necessary restore the a4 register. */
26352 if (reg_containing_return_addr
!= LR_REGNUM
)
26354 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
26355 reg_containing_return_addr
= LR_REGNUM
;
26358 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
26361 if (crtl
->calls_eh_return
)
26362 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
26364 /* Return to caller. */
26365 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26367 /* This is for the cases where LR is not being used to contain the return
26368 address. It may therefore contain information that we might not want
26369 to leak, hence it must be cleared. The value in R0 will never be a
26370 secret at this point, so it is safe to use it, see the clearing code
26371 in cmse_nonsecure_entry_clear_before_return (). */
26372 if (reg_containing_return_addr
!= LR_REGNUM
)
26373 asm_fprintf (f
, "\tmov\tlr, r0\n");
26375 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26376 by cmse_nonsecure_entry_clear_before_return (). */
26377 if (!TARGET_HAVE_FPCXT_CMSE
)
26378 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
26379 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
26382 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
26385 /* Scan INSN just before assembler is output for it.
26386 For Thumb-1, we track the status of the condition codes; this
26387 information is used in the cbranchsi4_insn pattern. */
26389 thumb1_final_prescan_insn (rtx_insn
*insn
)
26391 if (flag_print_asm_name
)
26392 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
26393 INSN_ADDRESSES (INSN_UID (insn
)));
26394 /* Don't overwrite the previous setter when we get to a cbranch. */
26395 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
26397 enum attr_conds conds
;
26399 if (cfun
->machine
->thumb1_cc_insn
)
26401 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
26402 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
26405 conds
= get_attr_conds (insn
);
26406 if (conds
== CONDS_SET
)
26408 rtx set
= single_set (insn
);
26409 cfun
->machine
->thumb1_cc_insn
= insn
;
26410 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
26411 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
26412 cfun
->machine
->thumb1_cc_mode
= CC_NZmode
;
26413 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
26415 rtx src1
= XEXP (SET_SRC (set
), 1);
26416 if (src1
== const0_rtx
)
26417 cfun
->machine
->thumb1_cc_mode
= CCmode
;
26419 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
26421 /* Record the src register operand instead of dest because
26422 cprop_hardreg pass propagates src. */
26423 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
26426 else if (conds
!= CONDS_NOCOND
)
26427 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
26430 /* Check if unexpected far jump is used. */
26431 if (cfun
->machine
->lr_save_eliminated
26432 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26433 internal_error("Unexpected thumb1 far jump");
26437 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
26439 unsigned HOST_WIDE_INT mask
= 0xff;
26442 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
26443 if (val
== 0) /* XXX */
26446 for (i
= 0; i
< 25; i
++)
26447 if ((val
& (mask
<< i
)) == val
)
26453 /* Returns nonzero if the current function contains,
26454 or might contain a far jump. */
26456 thumb_far_jump_used_p (void)
26459 bool far_jump
= false;
26460 unsigned int func_size
= 0;
26462 /* If we have already decided that far jumps may be used,
26463 do not bother checking again, and always return true even if
26464 it turns out that they are not being used. Once we have made
26465 the decision that far jumps are present (and that hence the link
26466 register will be pushed onto the stack) we cannot go back on it. */
26467 if (cfun
->machine
->far_jump_used
)
26470 /* If this function is not being called from the prologue/epilogue
26471 generation code then it must be being called from the
26472 INITIAL_ELIMINATION_OFFSET macro. */
26473 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
26475 /* In this case we know that we are being asked about the elimination
26476 of the arg pointer register. If that register is not being used,
26477 then there are no arguments on the stack, and we do not have to
26478 worry that a far jump might force the prologue to push the link
26479 register, changing the stack offsets. In this case we can just
26480 return false, since the presence of far jumps in the function will
26481 not affect stack offsets.
26483 If the arg pointer is live (or if it was live, but has now been
26484 eliminated and so set to dead) then we do have to test to see if
26485 the function might contain a far jump. This test can lead to some
26486 false negatives, since before reload is completed, then length of
26487 branch instructions is not known, so gcc defaults to returning their
26488 longest length, which in turn sets the far jump attribute to true.
26490 A false negative will not result in bad code being generated, but it
26491 will result in a needless push and pop of the link register. We
26492 hope that this does not occur too often.
26494 If we need doubleword stack alignment this could affect the other
26495 elimination offsets so we can't risk getting it wrong. */
26496 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
26497 cfun
->machine
->arg_pointer_live
= 1;
26498 else if (!cfun
->machine
->arg_pointer_live
)
26502 /* We should not change far_jump_used during or after reload, as there is
26503 no chance to change stack frame layout. */
26504 if (reload_in_progress
|| reload_completed
)
26507 /* Check to see if the function contains a branch
26508 insn with the far jump attribute set. */
26509 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
26511 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
26515 func_size
+= get_attr_length (insn
);
26518 /* Attribute far_jump will always be true for thumb1 before
26519 shorten_branch pass. So checking far_jump attribute before
26520 shorten_branch isn't much useful.
26522 Following heuristic tries to estimate more accurately if a far jump
26523 may finally be used. The heuristic is very conservative as there is
26524 no chance to roll-back the decision of not to use far jump.
26526 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26527 2-byte insn is associated with a 4 byte constant pool. Using
26528 function size 2048/3 as the threshold is conservative enough. */
26531 if ((func_size
* 3) >= 2048)
26533 /* Record the fact that we have decided that
26534 the function does use far jumps. */
26535 cfun
->machine
->far_jump_used
= 1;
26543 /* Return nonzero if FUNC must be entered in ARM mode. */
26545 is_called_in_ARM_mode (tree func
)
26547 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
26549 /* Ignore the problem about functions whose address is taken. */
26550 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
26554 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
26560 /* Given the stack offsets and register mask in OFFSETS, decide how
26561 many additional registers to push instead of subtracting a constant
26562 from SP. For epilogues the principle is the same except we use pop.
26563 FOR_PROLOGUE indicates which we're generating. */
26565 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
26567 HOST_WIDE_INT amount
;
26568 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
26569 /* Extract a mask of the ones we can give to the Thumb's push/pop
26571 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
26572 /* Then count how many other high registers will need to be pushed. */
26573 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26574 int n_free
, reg_base
, size
;
26576 if (!for_prologue
&& frame_pointer_needed
)
26577 amount
= offsets
->locals_base
- offsets
->saved_regs
;
26579 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
26581 /* If the stack frame size is 512 exactly, we can save one load
26582 instruction, which should make this a win even when optimizing
26584 if (!optimize_size
&& amount
!= 512)
26587 /* Can't do this if there are high registers to push. */
26588 if (high_regs_pushed
!= 0)
26591 /* Shouldn't do it in the prologue if no registers would normally
26592 be pushed at all. In the epilogue, also allow it if we'll have
26593 a pop insn for the PC. */
26596 || TARGET_BACKTRACE
26597 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
26598 || TARGET_INTERWORK
26599 || crtl
->args
.pretend_args_size
!= 0))
26602 /* Don't do this if thumb_expand_prologue wants to emit instructions
26603 between the push and the stack frame allocation. */
26605 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
26606 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
26613 size
= arm_size_return_regs ();
26614 reg_base
= ARM_NUM_INTS (size
);
26615 live_regs_mask
>>= reg_base
;
26618 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
26619 && (for_prologue
|| call_used_or_fixed_reg_p (reg_base
+ n_free
)))
26621 live_regs_mask
>>= 1;
26627 gcc_assert (amount
/ 4 * 4 == amount
);
26629 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
26630 return (amount
- 508) / 4;
26631 if (amount
<= n_free
* 4)
26636 /* The bits which aren't usefully expanded as rtl. */
26638 thumb1_unexpanded_epilogue (void)
26640 arm_stack_offsets
*offsets
;
26642 unsigned long live_regs_mask
= 0;
26643 int high_regs_pushed
= 0;
26645 int had_to_push_lr
;
26648 if (cfun
->machine
->return_used_this_function
!= 0)
26651 if (IS_NAKED (arm_current_func_type ()))
26654 offsets
= arm_get_frame_offsets ();
26655 live_regs_mask
= offsets
->saved_regs_mask
;
26656 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26658 /* If we can deduce the registers used from the function's return value.
26659 This is more reliable that examining df_regs_ever_live_p () because that
26660 will be set if the register is ever used in the function, not just if
26661 the register is used to hold a return value. */
26662 size
= arm_size_return_regs ();
26664 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
26667 unsigned long extra_mask
= (1 << extra_pop
) - 1;
26668 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
26671 /* The prolog may have pushed some high registers to use as
26672 work registers. e.g. the testsuite file:
26673 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26674 compiles to produce:
26675 push {r4, r5, r6, r7, lr}
26679 as part of the prolog. We have to undo that pushing here. */
26681 if (high_regs_pushed
)
26683 unsigned long mask
= live_regs_mask
& 0xff;
26686 mask
|= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26689 /* Oh dear! We have no low registers into which we can pop
26692 ("no low registers available for popping high registers");
26694 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
26695 if (live_regs_mask
& (1 << next_hi_reg
))
26698 while (high_regs_pushed
)
26700 /* Find lo register(s) into which the high register(s) can
26702 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26704 if (mask
& (1 << regno
))
26705 high_regs_pushed
--;
26706 if (high_regs_pushed
== 0)
26710 if (high_regs_pushed
== 0 && regno
>= 0)
26711 mask
&= ~((1 << regno
) - 1);
26713 /* Pop the values into the low register(s). */
26714 thumb_pop (asm_out_file
, mask
);
26716 /* Move the value(s) into the high registers. */
26717 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
26719 if (mask
& (1 << regno
))
26721 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
26724 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
26726 if (live_regs_mask
& (1 << next_hi_reg
))
26731 live_regs_mask
&= ~0x0f00;
26734 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
26735 live_regs_mask
&= 0xff;
26737 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
26739 /* Pop the return address into the PC. */
26740 if (had_to_push_lr
)
26741 live_regs_mask
|= 1 << PC_REGNUM
;
26743 /* Either no argument registers were pushed or a backtrace
26744 structure was created which includes an adjusted stack
26745 pointer, so just pop everything. */
26746 if (live_regs_mask
)
26747 thumb_pop (asm_out_file
, live_regs_mask
);
26749 /* We have either just popped the return address into the
26750 PC or it is was kept in LR for the entire function.
26751 Note that thumb_pop has already called thumb_exit if the
26752 PC was in the list. */
26753 if (!had_to_push_lr
)
26754 thumb_exit (asm_out_file
, LR_REGNUM
);
26758 /* Pop everything but the return address. */
26759 if (live_regs_mask
)
26760 thumb_pop (asm_out_file
, live_regs_mask
);
26762 if (had_to_push_lr
)
26766 /* We have no free low regs, so save one. */
26767 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
26771 /* Get the return address into a temporary register. */
26772 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
26776 /* Move the return address to lr. */
26777 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
26779 /* Restore the low register. */
26780 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
26785 regno
= LAST_ARG_REGNUM
;
26790 /* Remove the argument registers that were pushed onto the stack. */
26791 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
26792 SP_REGNUM
, SP_REGNUM
,
26793 crtl
->args
.pretend_args_size
);
26795 thumb_exit (asm_out_file
, regno
);
26801 /* Functions to save and restore machine-specific function data. */
26802 static struct machine_function
*
26803 arm_init_machine_status (void)
26805 struct machine_function
*machine
;
26806 machine
= ggc_cleared_alloc
<machine_function
> ();
26808 #if ARM_FT_UNKNOWN != 0
26809 machine
->func_type
= ARM_FT_UNKNOWN
;
26811 machine
->static_chain_stack_bytes
= -1;
26815 /* Return an RTX indicating where the return address to the
26816 calling function can be found. */
26818 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
26823 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
26826 /* Do anything needed before RTL is emitted for each function. */
26828 arm_init_expanders (void)
26830 /* Arrange to initialize and mark the machine per-function status. */
26831 init_machine_status
= arm_init_machine_status
;
26833 /* This is to stop the combine pass optimizing away the alignment
26834 adjustment of va_arg. */
26835 /* ??? It is claimed that this should not be necessary. */
26837 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
26840 /* Check that FUNC is called with a different mode. */
26843 arm_change_mode_p (tree func
)
26845 if (TREE_CODE (func
) != FUNCTION_DECL
)
26848 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
26851 callee_tree
= target_option_default_node
;
26853 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
26854 int flags
= callee_opts
->x_target_flags
;
26856 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
26859 /* Like arm_compute_initial_elimination offset. Simpler because there
26860 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26861 to point at the base of the local variables after static stack
26862 space for a function has been allocated. */
26865 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
26867 arm_stack_offsets
*offsets
;
26869 offsets
= arm_get_frame_offsets ();
26873 case ARG_POINTER_REGNUM
:
26876 case STACK_POINTER_REGNUM
:
26877 return offsets
->outgoing_args
- offsets
->saved_args
;
26879 case FRAME_POINTER_REGNUM
:
26880 return offsets
->soft_frame
- offsets
->saved_args
;
26882 case ARM_HARD_FRAME_POINTER_REGNUM
:
26883 return offsets
->saved_regs
- offsets
->saved_args
;
26885 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26886 return offsets
->locals_base
- offsets
->saved_args
;
26889 gcc_unreachable ();
26893 case FRAME_POINTER_REGNUM
:
26896 case STACK_POINTER_REGNUM
:
26897 return offsets
->outgoing_args
- offsets
->soft_frame
;
26899 case ARM_HARD_FRAME_POINTER_REGNUM
:
26900 return offsets
->saved_regs
- offsets
->soft_frame
;
26902 case THUMB_HARD_FRAME_POINTER_REGNUM
:
26903 return offsets
->locals_base
- offsets
->soft_frame
;
26906 gcc_unreachable ();
26911 gcc_unreachable ();
26915 /* Generate the function's prologue. */
26918 thumb1_expand_prologue (void)
26922 HOST_WIDE_INT amount
;
26923 HOST_WIDE_INT size
;
26924 arm_stack_offsets
*offsets
;
26925 unsigned long func_type
;
26927 unsigned long live_regs_mask
;
26928 unsigned long l_mask
;
26929 unsigned high_regs_pushed
= 0;
26930 bool lr_needs_saving
;
26932 func_type
= arm_current_func_type ();
26934 /* Naked functions don't have prologues. */
26935 if (IS_NAKED (func_type
))
26937 if (flag_stack_usage_info
)
26938 current_function_static_stack_size
= 0;
26942 if (IS_INTERRUPT (func_type
))
26944 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26948 if (is_called_in_ARM_mode (current_function_decl
))
26949 emit_insn (gen_prologue_thumb1_interwork ());
26951 offsets
= arm_get_frame_offsets ();
26952 live_regs_mask
= offsets
->saved_regs_mask
;
26953 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
26955 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26956 l_mask
= live_regs_mask
& 0x40ff;
26957 /* Then count how many other high registers will need to be pushed. */
26958 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
26960 if (crtl
->args
.pretend_args_size
)
26962 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
26964 if (cfun
->machine
->uses_anonymous_args
)
26966 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
26967 unsigned long mask
;
26969 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
26970 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
26972 insn
= thumb1_emit_multi_reg_push (mask
, 0);
26976 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
26977 stack_pointer_rtx
, x
));
26979 RTX_FRAME_RELATED_P (insn
) = 1;
26982 if (TARGET_BACKTRACE
)
26984 HOST_WIDE_INT offset
= 0;
26985 unsigned work_register
;
26986 rtx work_reg
, x
, arm_hfp_rtx
;
26988 /* We have been asked to create a stack backtrace structure.
26989 The code looks like this:
26993 0 sub SP, #16 Reserve space for 4 registers.
26994 2 push {R7} Push low registers.
26995 4 add R7, SP, #20 Get the stack pointer before the push.
26996 6 str R7, [SP, #8] Store the stack pointer
26997 (before reserving the space).
26998 8 mov R7, PC Get hold of the start of this code + 12.
26999 10 str R7, [SP, #16] Store it.
27000 12 mov R7, FP Get hold of the current frame pointer.
27001 14 str R7, [SP, #4] Store it.
27002 16 mov R7, LR Get hold of the current return address.
27003 18 str R7, [SP, #12] Store it.
27004 20 add R7, SP, #16 Point at the start of the
27005 backtrace structure.
27006 22 mov FP, R7 Put this value into the frame pointer. */
27008 work_register
= thumb_find_work_register (live_regs_mask
);
27009 work_reg
= gen_rtx_REG (SImode
, work_register
);
27010 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
27012 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27013 stack_pointer_rtx
, GEN_INT (-16)));
27014 RTX_FRAME_RELATED_P (insn
) = 1;
27018 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
27019 RTX_FRAME_RELATED_P (insn
) = 1;
27020 lr_needs_saving
= false;
27022 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
27025 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
27026 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27028 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
27029 x
= gen_frame_mem (SImode
, x
);
27030 emit_move_insn (x
, work_reg
);
27032 /* Make sure that the instruction fetching the PC is in the right place
27033 to calculate "start of backtrace creation code + 12". */
27034 /* ??? The stores using the common WORK_REG ought to be enough to
27035 prevent the scheduler from doing anything weird. Failing that
27036 we could always move all of the following into an UNSPEC_VOLATILE. */
27039 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27040 emit_move_insn (work_reg
, x
);
27042 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27043 x
= gen_frame_mem (SImode
, x
);
27044 emit_move_insn (x
, work_reg
);
27046 emit_move_insn (work_reg
, arm_hfp_rtx
);
27048 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27049 x
= gen_frame_mem (SImode
, x
);
27050 emit_move_insn (x
, work_reg
);
27054 emit_move_insn (work_reg
, arm_hfp_rtx
);
27056 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
27057 x
= gen_frame_mem (SImode
, x
);
27058 emit_move_insn (x
, work_reg
);
27060 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
27061 emit_move_insn (work_reg
, x
);
27063 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
27064 x
= gen_frame_mem (SImode
, x
);
27065 emit_move_insn (x
, work_reg
);
27068 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
27069 emit_move_insn (work_reg
, x
);
27071 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
27072 x
= gen_frame_mem (SImode
, x
);
27073 emit_move_insn (x
, work_reg
);
27075 x
= GEN_INT (offset
+ 12);
27076 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
27078 emit_move_insn (arm_hfp_rtx
, work_reg
);
27080 /* Optimization: If we are not pushing any low registers but we are going
27081 to push some high registers then delay our first push. This will just
27082 be a push of LR and we can combine it with the push of the first high
27084 else if ((l_mask
& 0xff) != 0
27085 || (high_regs_pushed
== 0 && lr_needs_saving
))
27087 unsigned long mask
= l_mask
;
27088 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
27089 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
27090 RTX_FRAME_RELATED_P (insn
) = 1;
27091 lr_needs_saving
= false;
27094 if (high_regs_pushed
)
27096 unsigned pushable_regs
;
27097 unsigned next_hi_reg
;
27098 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
27099 : crtl
->args
.info
.nregs
;
27100 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
27102 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
27103 if (live_regs_mask
& (1 << next_hi_reg
))
27106 /* Here we need to mask out registers used for passing arguments
27107 even if they can be pushed. This is to avoid using them to
27108 stash the high registers. Such kind of stash may clobber the
27109 use of arguments. */
27110 pushable_regs
= l_mask
& (~arg_regs_mask
);
27111 pushable_regs
|= thumb1_prologue_unused_call_clobbered_lo_regs ();
27113 /* Normally, LR can be used as a scratch register once it has been
27114 saved; but if the function examines its own return address then
27115 the value is still live and we need to avoid using it. */
27116 bool return_addr_live
27117 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
27120 if (lr_needs_saving
|| return_addr_live
)
27121 pushable_regs
&= ~(1 << LR_REGNUM
);
27123 if (pushable_regs
== 0)
27124 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
27126 while (high_regs_pushed
> 0)
27128 unsigned long real_regs_mask
= 0;
27129 unsigned long push_mask
= 0;
27131 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
27133 if (pushable_regs
& (1 << regno
))
27135 emit_move_insn (gen_rtx_REG (SImode
, regno
),
27136 gen_rtx_REG (SImode
, next_hi_reg
));
27138 high_regs_pushed
--;
27139 real_regs_mask
|= (1 << next_hi_reg
);
27140 push_mask
|= (1 << regno
);
27142 if (high_regs_pushed
)
27144 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
27146 if (live_regs_mask
& (1 << next_hi_reg
))
27154 /* If we had to find a work register and we have not yet
27155 saved the LR then add it to the list of regs to push. */
27156 if (lr_needs_saving
)
27158 push_mask
|= 1 << LR_REGNUM
;
27159 real_regs_mask
|= 1 << LR_REGNUM
;
27160 lr_needs_saving
= false;
27161 /* If the return address is not live at this point, we
27162 can add LR to the list of registers that we can use
27164 if (!return_addr_live
)
27165 pushable_regs
|= 1 << LR_REGNUM
;
27168 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
27169 RTX_FRAME_RELATED_P (insn
) = 1;
27173 /* Load the pic register before setting the frame pointer,
27174 so we can use r7 as a temporary work register. */
27175 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
27176 arm_load_pic_register (live_regs_mask
, NULL_RTX
);
27178 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
27179 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
27180 stack_pointer_rtx
);
27182 size
= offsets
->outgoing_args
- offsets
->saved_args
;
27183 if (flag_stack_usage_info
)
27184 current_function_static_stack_size
= size
;
27186 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27187 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27188 || flag_stack_clash_protection
)
27190 sorry ("%<-fstack-check=specific%> for Thumb-1");
27192 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27193 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
27198 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27199 GEN_INT (- amount
)));
27200 RTX_FRAME_RELATED_P (insn
) = 1;
27206 /* The stack decrement is too big for an immediate value in a single
27207 insn. In theory we could issue multiple subtracts, but after
27208 three of them it becomes more space efficient to place the full
27209 value in the constant pool and load into a register. (Also the
27210 ARM debugger really likes to see only one stack decrement per
27211 function). So instead we look for a scratch register into which
27212 we can load the decrement, and then we subtract this from the
27213 stack pointer. Unfortunately on the thumb the only available
27214 scratch registers are the argument registers, and we cannot use
27215 these as they may hold arguments to the function. Instead we
27216 attempt to locate a call preserved register which is used by this
27217 function. If we can find one, then we know that it will have
27218 been pushed at the start of the prologue and so we can corrupt
27220 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
27221 if (live_regs_mask
& (1 << regno
))
27224 gcc_assert(regno
<= LAST_LO_REGNUM
);
27226 reg
= gen_rtx_REG (SImode
, regno
);
27228 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
27230 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27231 stack_pointer_rtx
, reg
));
27233 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27234 plus_constant (Pmode
, stack_pointer_rtx
,
27236 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27237 RTX_FRAME_RELATED_P (insn
) = 1;
27241 if (frame_pointer_needed
)
27242 thumb_set_frame_pointer (offsets
);
27244 /* If we are profiling, make sure no instructions are scheduled before
27245 the call to mcount. Similarly if the user has requested no
27246 scheduling in the prolog. Similarly if we want non-call exceptions
27247 using the EABI unwinder, to prevent faulting instructions from being
27248 swapped with a stack adjustment. */
27249 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
27250 || (arm_except_unwind_info (&global_options
) == UI_TARGET
27251 && cfun
->can_throw_non_call_exceptions
))
27252 emit_insn (gen_blockage ());
27254 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
27255 if (live_regs_mask
& 0xff)
27256 cfun
->machine
->lr_save_eliminated
= 0;
27259 /* Clear caller saved registers not used to pass return values and leaked
27260 condition flags before exiting a cmse_nonsecure_entry function. */
27263 cmse_nonsecure_entry_clear_before_return (void)
27265 bool clear_vfpregs
= TARGET_HARD_FLOAT
|| TARGET_HAVE_FPCXT_CMSE
;
27266 int regno
, maxregno
= clear_vfpregs
? LAST_VFP_REGNUM
: IP_REGNUM
;
27267 uint32_t padding_bits_to_clear
= 0;
27268 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
27269 rtx r1_reg
, result_rtl
, clearing_reg
= NULL_RTX
;
27272 bitmap_clear (to_clear_bitmap
);
27273 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
27274 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
27276 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27280 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
27282 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
27284 if (!TARGET_HAVE_FPCXT_CMSE
)
27286 /* Make sure we don't clear the two scratch registers used to clear
27287 the relevant FPSCR bits in output_return_instruction. */
27288 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
27289 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
27290 emit_use (gen_rtx_REG (SImode
, 4));
27291 bitmap_clear_bit (to_clear_bitmap
, 4);
27295 /* If the user has defined registers to be caller saved, these are no longer
27296 restored by the function before returning and must thus be cleared for
27297 security purposes. */
27298 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
27300 /* We do not touch registers that can be used to pass arguments as per
27301 the AAPCS, since these should never be made callee-saved by user
27303 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
27305 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
27307 if (!callee_saved_reg_p (regno
)
27308 && (!IN_RANGE (regno
, FIRST_VFP_REGNUM
, LAST_VFP_REGNUM
)
27309 || TARGET_HARD_FLOAT
))
27310 bitmap_set_bit (to_clear_bitmap
, regno
);
27313 /* Make sure we do not clear the registers used to return the result in. */
27314 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
27315 if (!VOID_TYPE_P (result_type
))
27317 uint64_t to_clear_return_mask
;
27318 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
27320 /* No need to check that we return in registers, because we don't
27321 support returning on stack yet. */
27322 gcc_assert (REG_P (result_rtl
));
27323 to_clear_return_mask
27324 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
27325 &padding_bits_to_clear
);
27326 if (to_clear_return_mask
)
27328 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
27329 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
27331 if (to_clear_return_mask
& (1ULL << regno
))
27332 bitmap_clear_bit (to_clear_bitmap
, regno
);
27337 if (padding_bits_to_clear
!= 0)
27339 int to_clear_bitmap_size
= SBITMAP_SIZE ((sbitmap
) to_clear_bitmap
);
27340 auto_sbitmap
to_clear_arg_regs_bitmap (to_clear_bitmap_size
);
27342 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27343 returning a composite type, which only uses r0. Let's make sure that
27344 r1-r3 is cleared too. */
27345 bitmap_clear (to_clear_arg_regs_bitmap
);
27346 bitmap_set_range (to_clear_arg_regs_bitmap
, R1_REGNUM
, NUM_ARG_REGS
- 1);
27347 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
27350 /* Clear full registers that leak before returning. */
27351 clearing_reg
= gen_rtx_REG (SImode
, TARGET_THUMB1
? R0_REGNUM
: LR_REGNUM
);
27352 r1_reg
= gen_rtx_REG (SImode
, R0_REGNUM
+ 1);
27353 cmse_clear_registers (to_clear_bitmap
, &padding_bits_to_clear
, 1, r1_reg
,
27357 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27358 POP instruction can be generated. LR should be replaced by PC. All
27359 the checks required are already done by USE_RETURN_INSN (). Hence,
27360 all we really need to check here is if single register is to be
27361 returned, or multiple register return. */
27363 thumb2_expand_return (bool simple_return
)
27366 unsigned long saved_regs_mask
;
27367 arm_stack_offsets
*offsets
;
27369 offsets
= arm_get_frame_offsets ();
27370 saved_regs_mask
= offsets
->saved_regs_mask
;
27372 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27373 if (saved_regs_mask
& (1 << i
))
27376 if (!simple_return
&& saved_regs_mask
)
27378 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27379 functions or adapt code to handle according to ACLE. This path should
27380 not be reachable for cmse_nonsecure_entry functions though we prefer
27381 to assert it for now to ensure that future code changes do not silently
27382 change this behavior. */
27383 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27386 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27387 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
27388 rtx addr
= gen_rtx_MEM (SImode
,
27389 gen_rtx_POST_INC (SImode
,
27390 stack_pointer_rtx
));
27391 set_mem_alias_set (addr
, get_frame_alias_set ());
27392 XVECEXP (par
, 0, 0) = ret_rtx
;
27393 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
27394 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
27395 emit_jump_insn (par
);
27399 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
27400 saved_regs_mask
|= (1 << PC_REGNUM
);
27401 arm_emit_multi_reg_pop (saved_regs_mask
);
27406 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27407 cmse_nonsecure_entry_clear_before_return ();
27408 emit_jump_insn (simple_return_rtx
);
27413 thumb1_expand_epilogue (void)
27415 HOST_WIDE_INT amount
;
27416 arm_stack_offsets
*offsets
;
27419 /* Naked functions don't have prologues. */
27420 if (IS_NAKED (arm_current_func_type ()))
27423 offsets
= arm_get_frame_offsets ();
27424 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27426 if (frame_pointer_needed
)
27428 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
27429 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27431 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
27433 gcc_assert (amount
>= 0);
27436 emit_insn (gen_blockage ());
27439 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
27440 GEN_INT (amount
)));
27443 /* r3 is always free in the epilogue. */
27444 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
27446 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
27447 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
27451 /* Emit a USE (stack_pointer_rtx), so that
27452 the stack adjustment will not be deleted. */
27453 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27455 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
27456 emit_insn (gen_blockage ());
27458 /* Emit a clobber for each insn that will be restored in the epilogue,
27459 so that flow2 will get register lifetimes correct. */
27460 for (regno
= 0; regno
< 13; regno
++)
27461 if (reg_needs_saving_p (regno
))
27462 emit_clobber (gen_rtx_REG (SImode
, regno
));
27464 if (! df_regs_ever_live_p (LR_REGNUM
))
27465 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
27467 /* Clear all caller-saved regs that are not used to return. */
27468 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27469 cmse_nonsecure_entry_clear_before_return ();
27472 /* Epilogue code for APCS frame. */
27474 arm_expand_epilogue_apcs_frame (bool really_return
)
27476 unsigned long func_type
;
27477 unsigned long saved_regs_mask
;
27480 int floats_from_frame
= 0;
27481 arm_stack_offsets
*offsets
;
27483 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
27484 func_type
= arm_current_func_type ();
27486 /* Get frame offsets for ARM. */
27487 offsets
= arm_get_frame_offsets ();
27488 saved_regs_mask
= offsets
->saved_regs_mask
;
27490 /* Find the offset of the floating-point save area in the frame. */
27492 = (offsets
->saved_args
27493 + arm_compute_static_chain_stack_bytes ()
27496 /* Compute how many core registers saved and how far away the floats are. */
27497 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27498 if (saved_regs_mask
& (1 << i
))
27501 floats_from_frame
+= 4;
27504 if (TARGET_VFP_BASE
)
27507 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
27509 /* The offset is from IP_REGNUM. */
27510 int saved_size
= arm_get_vfp_saved_size ();
27511 if (saved_size
> 0)
27514 floats_from_frame
+= saved_size
;
27515 insn
= emit_insn (gen_addsi3 (ip_rtx
,
27516 hard_frame_pointer_rtx
,
27517 GEN_INT (-floats_from_frame
)));
27518 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
27519 ip_rtx
, hard_frame_pointer_rtx
);
27522 /* Generate VFP register multi-pop. */
27523 start_reg
= FIRST_VFP_REGNUM
;
27525 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
27526 /* Look for a case where a reg does not need restoring. */
27527 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27529 if (start_reg
!= i
)
27530 arm_emit_vfp_multi_reg_pop (start_reg
,
27531 (i
- start_reg
) / 2,
27532 gen_rtx_REG (SImode
,
27537 /* Restore the remaining regs that we have discovered (or possibly
27538 even all of them, if the conditional in the for loop never
27540 if (start_reg
!= i
)
27541 arm_emit_vfp_multi_reg_pop (start_reg
,
27542 (i
- start_reg
) / 2,
27543 gen_rtx_REG (SImode
, IP_REGNUM
));
27548 /* The frame pointer is guaranteed to be non-double-word aligned, as
27549 it is set to double-word-aligned old_stack_pointer - 4. */
27551 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
27553 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
27554 if (reg_needs_saving_p (i
))
27556 rtx addr
= gen_frame_mem (V2SImode
,
27557 plus_constant (Pmode
, hard_frame_pointer_rtx
,
27559 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27560 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27561 gen_rtx_REG (V2SImode
, i
),
27567 /* saved_regs_mask should contain IP which contains old stack pointer
27568 at the time of activation creation. Since SP and IP are adjacent registers,
27569 we can restore the value directly into SP. */
27570 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
27571 saved_regs_mask
&= ~(1 << IP_REGNUM
);
27572 saved_regs_mask
|= (1 << SP_REGNUM
);
27574 /* There are two registers left in saved_regs_mask - LR and PC. We
27575 only need to restore LR (the return address), but to
27576 save time we can load it directly into PC, unless we need a
27577 special function exit sequence, or we are not really returning. */
27579 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
27580 && !crtl
->calls_eh_return
)
27581 /* Delete LR from the register mask, so that LR on
27582 the stack is loaded into the PC in the register mask. */
27583 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27585 saved_regs_mask
&= ~(1 << PC_REGNUM
);
27587 num_regs
= bit_count (saved_regs_mask
);
27588 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
27591 emit_insn (gen_blockage ());
27592 /* Unwind the stack to just below the saved registers. */
27593 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27594 hard_frame_pointer_rtx
,
27595 GEN_INT (- 4 * num_regs
)));
27597 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
27598 stack_pointer_rtx
, hard_frame_pointer_rtx
);
27601 arm_emit_multi_reg_pop (saved_regs_mask
);
27603 if (IS_INTERRUPT (func_type
))
27605 /* Interrupt handlers will have pushed the
27606 IP onto the stack, so restore it now. */
27608 rtx addr
= gen_rtx_MEM (SImode
,
27609 gen_rtx_POST_INC (SImode
,
27610 stack_pointer_rtx
));
27611 set_mem_alias_set (addr
, get_frame_alias_set ());
27612 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
27613 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27614 gen_rtx_REG (SImode
, IP_REGNUM
),
27618 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
27621 if (crtl
->calls_eh_return
)
27622 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27624 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27626 if (IS_STACKALIGN (func_type
))
27627 /* Restore the original stack pointer. Before prologue, the stack was
27628 realigned and the original stack pointer saved in r0. For details,
27629 see comment in arm_expand_prologue. */
27630 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27632 emit_jump_insn (simple_return_rtx
);
27635 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27636 function is not a sibcall. */
27638 arm_expand_epilogue (bool really_return
)
27640 unsigned long func_type
;
27641 unsigned long saved_regs_mask
;
27645 arm_stack_offsets
*offsets
;
27647 func_type
= arm_current_func_type ();
27649 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27650 let output_return_instruction take care of instruction emission if any. */
27651 if (IS_NAKED (func_type
)
27652 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
27655 emit_jump_insn (simple_return_rtx
);
27659 /* If we are throwing an exception, then we really must be doing a
27660 return, so we can't tail-call. */
27661 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
27663 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
27665 arm_expand_epilogue_apcs_frame (really_return
);
27669 /* Get frame offsets for ARM. */
27670 offsets
= arm_get_frame_offsets ();
27671 saved_regs_mask
= offsets
->saved_regs_mask
;
27672 num_regs
= bit_count (saved_regs_mask
);
27674 if (frame_pointer_needed
)
27677 /* Restore stack pointer if necessary. */
27680 /* In ARM mode, frame pointer points to first saved register.
27681 Restore stack pointer to last saved register. */
27682 amount
= offsets
->frame
- offsets
->saved_regs
;
27684 /* Force out any pending memory operations that reference stacked data
27685 before stack de-allocation occurs. */
27686 emit_insn (gen_blockage ());
27687 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27688 hard_frame_pointer_rtx
,
27689 GEN_INT (amount
)));
27690 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27692 hard_frame_pointer_rtx
);
27694 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27696 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27700 /* In Thumb-2 mode, the frame pointer points to the last saved
27702 amount
= offsets
->locals_base
- offsets
->saved_regs
;
27705 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
27706 hard_frame_pointer_rtx
,
27707 GEN_INT (amount
)));
27708 arm_add_cfa_adjust_cfa_note (insn
, amount
,
27709 hard_frame_pointer_rtx
,
27710 hard_frame_pointer_rtx
);
27713 /* Force out any pending memory operations that reference stacked data
27714 before stack de-allocation occurs. */
27715 emit_insn (gen_blockage ());
27716 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
27717 hard_frame_pointer_rtx
));
27718 arm_add_cfa_adjust_cfa_note (insn
, 0,
27720 hard_frame_pointer_rtx
);
27721 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27723 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27728 /* Pop off outgoing args and local frame to adjust stack pointer to
27729 last saved register. */
27730 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
27734 /* Force out any pending memory operations that reference stacked data
27735 before stack de-allocation occurs. */
27736 emit_insn (gen_blockage ());
27737 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
27739 GEN_INT (amount
)));
27740 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27741 stack_pointer_rtx
, stack_pointer_rtx
);
27742 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27744 emit_insn (gen_force_register_use (stack_pointer_rtx
));
27748 if (TARGET_VFP_BASE
)
27750 /* Generate VFP register multi-pop. */
27751 int end_reg
= LAST_VFP_REGNUM
+ 1;
27753 /* Scan the registers in reverse order. We need to match
27754 any groupings made in the prologue and generate matching
27755 vldm operations. The need to match groups is because,
27756 unlike pop, vldm can only do consecutive regs. */
27757 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
27758 /* Look for a case where a reg does not need restoring. */
27759 if (!reg_needs_saving_p (i
) && !reg_needs_saving_p (i
+ 1))
27761 /* Restore the regs discovered so far (from reg+2 to
27763 if (end_reg
> i
+ 2)
27764 arm_emit_vfp_multi_reg_pop (i
+ 2,
27765 (end_reg
- (i
+ 2)) / 2,
27766 stack_pointer_rtx
);
27770 /* Restore the remaining regs that we have discovered (or possibly
27771 even all of them, if the conditional in the for loop never
27773 if (end_reg
> i
+ 2)
27774 arm_emit_vfp_multi_reg_pop (i
+ 2,
27775 (end_reg
- (i
+ 2)) / 2,
27776 stack_pointer_rtx
);
27780 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
27781 if (reg_needs_saving_p (i
))
27784 rtx addr
= gen_rtx_MEM (V2SImode
,
27785 gen_rtx_POST_INC (SImode
,
27786 stack_pointer_rtx
));
27787 set_mem_alias_set (addr
, get_frame_alias_set ());
27788 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
27789 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27790 gen_rtx_REG (V2SImode
, i
),
27792 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27793 stack_pointer_rtx
, stack_pointer_rtx
);
27796 if (saved_regs_mask
)
27799 bool return_in_pc
= false;
27801 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
27802 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
27803 && !IS_CMSE_ENTRY (func_type
)
27804 && !IS_STACKALIGN (func_type
)
27806 && crtl
->args
.pretend_args_size
== 0
27807 && saved_regs_mask
& (1 << LR_REGNUM
)
27808 && !crtl
->calls_eh_return
)
27810 saved_regs_mask
&= ~(1 << LR_REGNUM
);
27811 saved_regs_mask
|= (1 << PC_REGNUM
);
27812 return_in_pc
= true;
27815 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
27817 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
27818 if (saved_regs_mask
& (1 << i
))
27820 rtx addr
= gen_rtx_MEM (SImode
,
27821 gen_rtx_POST_INC (SImode
,
27822 stack_pointer_rtx
));
27823 set_mem_alias_set (addr
, get_frame_alias_set ());
27825 if (i
== PC_REGNUM
)
27827 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
27828 XVECEXP (insn
, 0, 0) = ret_rtx
;
27829 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
27831 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
27832 insn
= emit_jump_insn (insn
);
27836 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
27838 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
27839 gen_rtx_REG (SImode
, i
),
27841 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
27843 stack_pointer_rtx
);
27850 && current_tune
->prefer_ldrd_strd
27851 && !optimize_function_for_size_p (cfun
))
27854 thumb2_emit_ldrd_pop (saved_regs_mask
);
27855 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
27856 arm_emit_ldrd_pop (saved_regs_mask
);
27858 arm_emit_multi_reg_pop (saved_regs_mask
);
27861 arm_emit_multi_reg_pop (saved_regs_mask
);
27869 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
27873 rtx dwarf
= NULL_RTX
;
27875 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27877 GEN_INT (amount
)));
27879 RTX_FRAME_RELATED_P (tmp
) = 1;
27881 if (cfun
->machine
->uses_anonymous_args
)
27883 /* Restore pretend args. Refer arm_expand_prologue on how to save
27884 pretend_args in stack. */
27885 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
27886 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
27887 for (j
= 0, i
= 0; j
< num_regs
; i
++)
27888 if (saved_regs_mask
& (1 << i
))
27890 rtx reg
= gen_rtx_REG (SImode
, i
);
27891 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
27894 REG_NOTES (tmp
) = dwarf
;
27896 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
27897 stack_pointer_rtx
, stack_pointer_rtx
);
27900 if (IS_CMSE_ENTRY (func_type
))
27902 /* CMSE_ENTRY always returns. */
27903 gcc_assert (really_return
);
27904 /* Clear all caller-saved regs that are not used to return. */
27905 cmse_nonsecure_entry_clear_before_return ();
27907 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27909 if (TARGET_HAVE_FPCXT_CMSE
)
27913 insn
= emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx
,
27914 GEN_INT (FPCXTNS_ENUM
)));
27915 rtx dwarf
= gen_rtx_SET (stack_pointer_rtx
,
27916 plus_constant (Pmode
, stack_pointer_rtx
, 4));
27917 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
27918 RTX_FRAME_RELATED_P (insn
) = 1;
27922 if (!really_return
)
27925 if (crtl
->calls_eh_return
)
27926 emit_insn (gen_addsi3 (stack_pointer_rtx
,
27928 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
27930 if (IS_STACKALIGN (func_type
))
27931 /* Restore the original stack pointer. Before prologue, the stack was
27932 realigned and the original stack pointer saved in r0. For details,
27933 see comment in arm_expand_prologue. */
27934 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
27936 emit_jump_insn (simple_return_rtx
);
27939 /* Implementation of insn prologue_thumb1_interwork. This is the first
27940 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27943 thumb1_output_interwork (void)
27946 FILE *f
= asm_out_file
;
27948 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
27949 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
27951 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
27953 /* Generate code sequence to switch us into Thumb mode. */
27954 /* The .code 32 directive has already been emitted by
27955 ASM_DECLARE_FUNCTION_NAME. */
27956 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
27957 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
27959 /* Generate a label, so that the debugger will notice the
27960 change in instruction sets. This label is also used by
27961 the assembler to bypass the ARM code when this function
27962 is called from a Thumb encoded function elsewhere in the
27963 same file. Hence the definition of STUB_NAME here must
27964 agree with the definition in gas/config/tc-arm.c. */
27966 #define STUB_NAME ".real_start_of"
27968 fprintf (f
, "\t.code\t16\n");
27970 if (arm_dllexport_name_p (name
))
27971 name
= arm_strip_name_encoding (name
);
27973 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
27974 fprintf (f
, "\t.thumb_func\n");
27975 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
27980 /* Handle the case of a double word load into a low register from
27981 a computed memory address. The computed address may involve a
27982 register which is overwritten by the load. */
27984 thumb_load_double_from_address (rtx
*operands
)
27992 gcc_assert (REG_P (operands
[0]));
27993 gcc_assert (MEM_P (operands
[1]));
27995 /* Get the memory address. */
27996 addr
= XEXP (operands
[1], 0);
27998 /* Work out how the memory address is computed. */
27999 switch (GET_CODE (addr
))
28002 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28004 if (REGNO (operands
[0]) == REGNO (addr
))
28006 output_asm_insn ("ldr\t%H0, %2", operands
);
28007 output_asm_insn ("ldr\t%0, %1", operands
);
28011 output_asm_insn ("ldr\t%0, %1", operands
);
28012 output_asm_insn ("ldr\t%H0, %2", operands
);
28017 /* Compute <address> + 4 for the high order load. */
28018 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28020 output_asm_insn ("ldr\t%0, %1", operands
);
28021 output_asm_insn ("ldr\t%H0, %2", operands
);
28025 arg1
= XEXP (addr
, 0);
28026 arg2
= XEXP (addr
, 1);
28028 if (CONSTANT_P (arg1
))
28029 base
= arg2
, offset
= arg1
;
28031 base
= arg1
, offset
= arg2
;
28033 gcc_assert (REG_P (base
));
28035 /* Catch the case of <address> = <reg> + <reg> */
28036 if (REG_P (offset
))
28038 int reg_offset
= REGNO (offset
);
28039 int reg_base
= REGNO (base
);
28040 int reg_dest
= REGNO (operands
[0]);
28042 /* Add the base and offset registers together into the
28043 higher destination register. */
28044 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
28045 reg_dest
+ 1, reg_base
, reg_offset
);
28047 /* Load the lower destination register from the address in
28048 the higher destination register. */
28049 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
28050 reg_dest
, reg_dest
+ 1);
28052 /* Load the higher destination register from its own address
28054 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
28055 reg_dest
+ 1, reg_dest
+ 1);
28059 /* Compute <address> + 4 for the high order load. */
28060 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28062 /* If the computed address is held in the low order register
28063 then load the high order register first, otherwise always
28064 load the low order register first. */
28065 if (REGNO (operands
[0]) == REGNO (base
))
28067 output_asm_insn ("ldr\t%H0, %2", operands
);
28068 output_asm_insn ("ldr\t%0, %1", operands
);
28072 output_asm_insn ("ldr\t%0, %1", operands
);
28073 output_asm_insn ("ldr\t%H0, %2", operands
);
28079 /* With no registers to worry about we can just load the value
28081 operands
[2] = adjust_address (operands
[1], SImode
, 4);
28083 output_asm_insn ("ldr\t%H0, %2", operands
);
28084 output_asm_insn ("ldr\t%0, %1", operands
);
28088 gcc_unreachable ();
28095 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
28100 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28101 std::swap (operands
[4], operands
[5]);
28103 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
28104 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
28108 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28109 std::swap (operands
[4], operands
[5]);
28110 if (REGNO (operands
[5]) > REGNO (operands
[6]))
28111 std::swap (operands
[5], operands
[6]);
28112 if (REGNO (operands
[4]) > REGNO (operands
[5]))
28113 std::swap (operands
[4], operands
[5]);
28115 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
28116 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
28120 gcc_unreachable ();
28126 /* Output a call-via instruction for thumb state. */
28128 thumb_call_via_reg (rtx reg
)
28130 int regno
= REGNO (reg
);
28133 gcc_assert (regno
< LR_REGNUM
);
28135 /* If we are in the normal text section we can use a single instance
28136 per compilation unit. If we are doing function sections, then we need
28137 an entry per section, since we can't rely on reachability. */
28138 if (in_section
== text_section
)
28140 thumb_call_reg_needed
= 1;
28142 if (thumb_call_via_label
[regno
] == NULL
)
28143 thumb_call_via_label
[regno
] = gen_label_rtx ();
28144 labelp
= thumb_call_via_label
+ regno
;
28148 if (cfun
->machine
->call_via
[regno
] == NULL
)
28149 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
28150 labelp
= cfun
->machine
->call_via
+ regno
;
28153 output_asm_insn ("bl\t%a0", labelp
);
28157 /* Routines for generating rtl. */
28159 thumb_expand_cpymemqi (rtx
*operands
)
28161 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
28162 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
28163 HOST_WIDE_INT len
= INTVAL (operands
[2]);
28164 HOST_WIDE_INT offset
= 0;
28168 emit_insn (gen_cpymem12b (out
, in
, out
, in
));
28174 emit_insn (gen_cpymem8b (out
, in
, out
, in
));
28180 rtx reg
= gen_reg_rtx (SImode
);
28181 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
28182 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
28189 rtx reg
= gen_reg_rtx (HImode
);
28190 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
28191 plus_constant (Pmode
, in
,
28193 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
28202 rtx reg
= gen_reg_rtx (QImode
);
28203 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
28204 plus_constant (Pmode
, in
,
28206 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
28213 thumb_reload_out_hi (rtx
*operands
)
28215 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
28218 /* Return the length of a function name prefix
28219 that starts with the character 'c'. */
28221 arm_get_strip_length (int c
)
28225 ARM_NAME_ENCODING_LENGTHS
28230 /* Return a pointer to a function's name with any
28231 and all prefix encodings stripped from it. */
28233 arm_strip_name_encoding (const char *name
)
28237 while ((skip
= arm_get_strip_length (* name
)))
28243 /* If there is a '*' anywhere in the name's prefix, then
28244 emit the stripped name verbatim, otherwise prepend an
28245 underscore if leading underscores are being used. */
28247 arm_asm_output_labelref (FILE *stream
, const char *name
)
28252 while ((skip
= arm_get_strip_length (* name
)))
28254 verbatim
|= (*name
== '*');
28259 fputs (name
, stream
);
28261 asm_fprintf (stream
, "%U%s", name
);
28264 /* This function is used to emit an EABI tag and its associated value.
28265 We emit the numerical value of the tag in case the assembler does not
28266 support textual tags. (Eg gas prior to 2.20). If requested we include
28267 the tag name in a comment so that anyone reading the assembler output
28268 will know which tag is being set.
28270 This function is not static because arm-c.cc needs it too. */
28273 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
28275 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
28276 if (flag_verbose_asm
|| flag_debug_asm
)
28277 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
28278 asm_fprintf (asm_out_file
, "\n");
28281 /* This function is used to print CPU tuning information as comment
28282 in assembler file. Pointers are not printed for now. */
28285 arm_print_tune_info (void)
28287 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
28288 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
28289 current_tune
->constant_limit
);
28290 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28291 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
28292 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28293 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
28294 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28295 "prefetch.l1_cache_size:\t%d\n",
28296 current_tune
->prefetch
.l1_cache_size
);
28297 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28298 "prefetch.l1_cache_line_size:\t%d\n",
28299 current_tune
->prefetch
.l1_cache_line_size
);
28300 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28301 "prefer_constant_pool:\t%d\n",
28302 (int) current_tune
->prefer_constant_pool
);
28303 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28304 "branch_cost:\t(s:speed, p:predictable)\n");
28305 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
28306 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
28307 current_tune
->branch_cost (false, false));
28308 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
28309 current_tune
->branch_cost (false, true));
28310 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
28311 current_tune
->branch_cost (true, false));
28312 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
28313 current_tune
->branch_cost (true, true));
28314 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28315 "prefer_ldrd_strd:\t%d\n",
28316 (int) current_tune
->prefer_ldrd_strd
);
28317 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28318 "logical_op_non_short_circuit:\t[%d,%d]\n",
28319 (int) current_tune
->logical_op_non_short_circuit_thumb
,
28320 (int) current_tune
->logical_op_non_short_circuit_arm
);
28321 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28322 "disparage_flag_setting_t16_encodings:\t%d\n",
28323 (int) current_tune
->disparage_flag_setting_t16_encodings
);
28324 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28325 "string_ops_prefer_neon:\t%d\n",
28326 (int) current_tune
->string_ops_prefer_neon
);
28327 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
28328 "max_insns_inline_memset:\t%d\n",
28329 current_tune
->max_insns_inline_memset
);
28330 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
28331 current_tune
->fusible_ops
);
28332 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
28333 (int) current_tune
->sched_autopref
);
28336 /* The last set of target options used to emit .arch directives, etc. This
28337 could be a function-local static if it were not required to expose it as a
28338 root to the garbage collector. */
28339 static GTY(()) cl_target_option
*last_asm_targ_options
= NULL
;
28341 /* Print .arch and .arch_extension directives corresponding to the
28342 current architecture configuration. */
28344 arm_print_asm_arch_directives (FILE *stream
, cl_target_option
*targ_options
)
28346 arm_build_target build_target
;
28347 /* If the target options haven't changed since the last time we were called
28348 there is nothing to do. This should be sufficient to suppress the
28349 majority of redundant work. */
28350 if (last_asm_targ_options
== targ_options
)
28353 last_asm_targ_options
= targ_options
;
28355 build_target
.isa
= sbitmap_alloc (isa_num_bits
);
28356 arm_configure_build_target (&build_target
, targ_options
, false);
28358 if (build_target
.core_name
28359 && !bitmap_bit_p (build_target
.isa
, isa_bit_quirk_no_asmcpu
))
28361 const char* truncated_name
28362 = arm_rewrite_selected_cpu (build_target
.core_name
);
28363 asm_fprintf (stream
, "\t.cpu %s\n", truncated_name
);
28366 const arch_option
*arch
28367 = arm_parse_arch_option_name (all_architectures
, "-march",
28368 build_target
.arch_name
);
28369 auto_sbitmap
opt_bits (isa_num_bits
);
28373 if (strcmp (build_target
.arch_name
, "armv7ve") == 0)
28375 /* Keep backward compatability for assemblers which don't support
28376 armv7ve. Fortunately, none of the following extensions are reset
28377 by a .fpu directive. */
28378 asm_fprintf (stream
, "\t.arch armv7-a\n");
28379 asm_fprintf (stream
, "\t.arch_extension virt\n");
28380 asm_fprintf (stream
, "\t.arch_extension idiv\n");
28381 asm_fprintf (stream
, "\t.arch_extension sec\n");
28382 asm_fprintf (stream
, "\t.arch_extension mp\n");
28385 asm_fprintf (stream
, "\t.arch %s\n", build_target
.arch_name
);
28387 /* The .fpu directive will reset any architecture extensions from the
28388 assembler that relate to the fp/vector extensions. So put this out before
28389 any .arch_extension directives. */
28390 const char *fpu_name
= (TARGET_SOFT_FLOAT
28392 : arm_identify_fpu_from_isa (build_target
.isa
));
28393 asm_fprintf (stream
, "\t.fpu %s\n", fpu_name
);
28395 if (!arch
->common
.extensions
)
28398 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
28404 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
28406 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28407 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28408 floating point instructions is disabled. So the following check
28409 restricts the printing of ".arch_extension mve" and
28410 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28411 this special behaviour because the feature bit "mve" and
28412 "mve_float" are not part of "fpu bits", so they are not cleared
28413 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28414 TARGET_HAVE_MVE_FLOAT are disabled. */
28415 if ((bitmap_bit_p (opt_bits
, isa_bit_mve
) && !TARGET_HAVE_MVE
)
28416 || (bitmap_bit_p (opt_bits
, isa_bit_mve_float
)
28417 && !TARGET_HAVE_MVE_FLOAT
))
28420 /* If every feature bit of this option is set in the target ISA
28421 specification, print out the option name. However, don't print
28422 anything if all the bits are part of the FPU specification. */
28423 if (bitmap_subset_p (opt_bits
, build_target
.isa
)
28424 && !bitmap_subset_p (opt_bits
, isa_all_fpubits_internal
))
28425 asm_fprintf (stream
, "\t.arch_extension %s\n", opt
->name
);
28431 arm_file_start (void)
28435 arm_print_asm_arch_directives
28436 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28440 /* If we have a named cpu, but we the assembler does not support that
28441 name via .cpu, put out a cpu name attribute; but don't do this if the
28442 name starts with the fictitious prefix, 'generic'. */
28443 if (arm_active_target
.core_name
28444 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
)
28445 && !startswith (arm_active_target
.core_name
, "generic"))
28447 const char* truncated_name
28448 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
28449 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_asmcpu
))
28450 asm_fprintf (asm_out_file
, "\t.eabi_attribute 5, \"%s\"\n",
28454 if (print_tune_info
)
28455 arm_print_tune_info ();
28457 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
28458 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28460 if (TARGET_HARD_FLOAT_ABI
)
28461 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28463 /* Some of these attributes only apply when the corresponding features
28464 are used. However we don't have any easy way of figuring this out.
28465 Conservatively record the setting that would have been used. */
28467 if (flag_rounding_math
)
28468 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28470 if (!flag_unsafe_math_optimizations
)
28472 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28473 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28475 if (flag_signaling_nans
)
28476 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28478 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28479 flag_finite_math_only
? 1 : 3);
28481 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28482 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28483 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28484 flag_short_enums
? 1 : 2);
28486 /* Tag_ABI_optimization_goals. */
28489 else if (optimize
>= 2)
28495 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
28497 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28500 if (arm_fp16_format
)
28501 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28502 (int) arm_fp16_format
);
28504 if (arm_lang_output_object_attributes_hook
)
28505 arm_lang_output_object_attributes_hook();
28508 default_file_start ();
28512 arm_file_end (void)
28516 /* Just in case the last function output in the assembler had non-default
28517 architecture directives, we force the assembler state back to the default
28518 set, so that any 'calculated' build attributes are based on the default
28519 options rather than the special options for that function. */
28520 arm_print_asm_arch_directives
28521 (asm_out_file
, TREE_TARGET_OPTION (target_option_default_node
));
28523 if (NEED_INDICATE_EXEC_STACK
)
28524 /* Add .note.GNU-stack. */
28525 file_end_indicate_exec_stack ();
28527 if (! thumb_call_reg_needed
)
28530 switch_to_section (text_section
);
28531 asm_fprintf (asm_out_file
, "\t.code 16\n");
28532 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
28534 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
28536 rtx label
= thumb_call_via_label
[regno
];
28540 targetm
.asm_out
.internal_label (asm_out_file
, "L",
28541 CODE_LABEL_NUMBER (label
));
28542 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
28548 /* Symbols in the text segment can be accessed without indirecting via the
28549 constant pool; it may take an extra binary operation, but this is still
28550 faster than indirecting via memory. Don't do this when not optimizing,
28551 since we won't be calculating al of the offsets necessary to do this
28555 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
28557 if (optimize
> 0 && TREE_CONSTANT (decl
))
28558 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
28560 default_encode_section_info (decl
, rtl
, first
);
28562 #endif /* !ARM_PE */
28565 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
28567 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
28568 && !strcmp (prefix
, "L"))
28570 arm_ccfsm_state
= 0;
28571 arm_target_insn
= NULL
;
28573 default_internal_label (stream
, prefix
, labelno
);
28576 /* Define classes to generate code as RTL or output asm to a file.
28577 Using templates then allows to use the same code to output code
28578 sequences in the two formats. */
28579 class thumb1_const_rtl
28582 thumb1_const_rtl (rtx dst
) : dst (dst
) {}
28584 void mov (HOST_WIDE_INT val
)
28586 emit_set_insn (dst
, GEN_INT (val
));
28589 void add (HOST_WIDE_INT val
)
28591 emit_set_insn (dst
, gen_rtx_PLUS (SImode
, dst
, GEN_INT (val
)));
28594 void ashift (HOST_WIDE_INT shift
)
28596 emit_set_insn (dst
, gen_rtx_ASHIFT (SImode
, dst
, GEN_INT (shift
)));
28601 emit_set_insn (dst
, gen_rtx_NEG (SImode
, dst
));
28608 class thumb1_const_print
28611 thumb1_const_print (FILE *f
, int regno
)
28614 dst_regname
= reg_names
[regno
];
28617 void mov (HOST_WIDE_INT val
)
28619 asm_fprintf (t_file
, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28623 void add (HOST_WIDE_INT val
)
28625 asm_fprintf (t_file
, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28629 void ashift (HOST_WIDE_INT shift
)
28631 asm_fprintf (t_file
, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC
"\n",
28632 dst_regname
, shift
);
28637 asm_fprintf (t_file
, "\trsbs\t%s, #0\n", dst_regname
);
28642 const char *dst_regname
;
28645 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28646 Avoid generating useless code when one of the bytes is zero. */
28649 thumb1_gen_const_int_1 (T dst
, HOST_WIDE_INT op1
)
28651 bool mov_done_p
= false;
28652 unsigned HOST_WIDE_INT val
= op1
;
28656 gcc_assert (op1
== trunc_int_for_mode (op1
, SImode
));
28664 /* For negative numbers with the first nine bits set, build the
28665 opposite of OP1, then negate it, it's generally shorter and not
28667 if ((val
& 0xFF800000) == 0xFF800000)
28669 thumb1_gen_const_int_1 (dst
, -op1
);
28674 /* In the general case, we need 7 instructions to build
28675 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28676 do better if VAL is small enough, or
28677 right-shiftable by a suitable amount. If the
28678 right-shift enables to encode at least one less byte,
28679 it's worth it: we save a adds and a lsls at the
28680 expense of a final lsls. */
28681 int final_shift
= number_of_first_bit_set (val
);
28683 int leading_zeroes
= clz_hwi (val
);
28684 int number_of_bytes_needed
28685 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
)
28686 / BITS_PER_UNIT
) + 1;
28687 int number_of_bytes_needed2
28688 = ((HOST_BITS_PER_WIDE_INT
- 1 - leading_zeroes
- final_shift
)
28689 / BITS_PER_UNIT
) + 1;
28691 if (number_of_bytes_needed2
< number_of_bytes_needed
)
28692 val
>>= final_shift
;
28696 /* If we are in a very small range, we can use either a single movs
28702 unsigned HOST_WIDE_INT high
= val
- 255;
28710 if (final_shift
> 0)
28711 dst
.ashift (final_shift
);
28715 /* General case, emit upper 3 bytes as needed. */
28716 for (i
= 0; i
< 3; i
++)
28718 unsigned HOST_WIDE_INT byte
= (val
>> (8 * (3 - i
))) & 0xff;
28722 /* We are about to emit new bits, stop accumulating a
28723 shift amount, and left-shift only if we have already
28724 emitted some upper bits. */
28727 dst
.ashift (shift
);
28733 /* Stop accumulating shift amount since we've just
28734 emitted some bits. */
28744 /* Emit lower byte. */
28746 dst
.mov (val
& 0xff);
28749 dst
.ashift (shift
);
28751 dst
.add (val
& 0xff);
28754 if (final_shift
> 0)
28755 dst
.ashift (final_shift
);
28759 /* Proxies for thumb1.md, since the thumb1_const_print and
28760 thumb1_const_rtl classes are not exported. */
28762 thumb1_gen_const_int_rtl (rtx dst
, HOST_WIDE_INT op1
)
28764 thumb1_const_rtl
t (dst
);
28765 thumb1_gen_const_int_1 (t
, op1
);
28769 thumb1_gen_const_int_print (rtx dst
, HOST_WIDE_INT op1
)
28771 thumb1_const_print
t (asm_out_file
, REGNO (dst
));
28772 thumb1_gen_const_int_1 (t
, op1
);
28775 /* Output code to add DELTA to the first argument, and then jump
28776 to FUNCTION. Used for C++ multiple inheritance. */
28779 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28780 HOST_WIDE_INT
, tree function
)
28782 static int thunk_label
= 0;
28785 int mi_delta
= delta
;
28786 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
28788 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
28791 mi_delta
= - mi_delta
;
28793 final_start_function (emit_barrier (), file
, 1);
28797 int labelno
= thunk_label
++;
28798 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
28799 /* Thunks are entered in arm mode when available. */
28800 if (TARGET_THUMB1_ONLY
)
28802 /* push r3 so we can use it as a temporary. */
28803 /* TODO: Omit this save if r3 is not used. */
28804 fputs ("\tpush {r3}\n", file
);
28806 /* With -mpure-code, we cannot load the address from the
28807 constant pool: we build it explicitly. */
28808 if (target_pure_code
)
28810 fputs ("\tmovs\tr3, #:upper8_15:#", file
);
28811 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28812 fputc ('\n', file
);
28813 fputs ("\tlsls r3, #8\n", file
);
28814 fputs ("\tadds\tr3, #:upper0_7:#", file
);
28815 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28816 fputc ('\n', file
);
28817 fputs ("\tlsls r3, #8\n", file
);
28818 fputs ("\tadds\tr3, #:lower8_15:#", file
);
28819 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28820 fputc ('\n', file
);
28821 fputs ("\tlsls r3, #8\n", file
);
28822 fputs ("\tadds\tr3, #:lower0_7:#", file
);
28823 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28824 fputc ('\n', file
);
28827 fputs ("\tldr\tr3, ", file
);
28831 fputs ("\tldr\tr12, ", file
);
28834 if (!target_pure_code
)
28836 assemble_name (file
, label
);
28837 fputc ('\n', file
);
28842 /* If we are generating PIC, the ldr instruction below loads
28843 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28844 the address of the add + 8, so we have:
28846 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28849 Note that we have "+ 1" because some versions of GNU ld
28850 don't set the low bit of the result for R_ARM_REL32
28851 relocations against thumb function symbols.
28852 On ARMv6M this is +4, not +8. */
28853 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
28854 assemble_name (file
, labelpc
);
28855 fputs (":\n", file
);
28856 if (TARGET_THUMB1_ONLY
)
28858 /* This is 2 insns after the start of the thunk, so we know it
28859 is 4-byte aligned. */
28860 fputs ("\tadd\tr3, pc, r3\n", file
);
28861 fputs ("\tmov r12, r3\n", file
);
28864 fputs ("\tadd\tr12, pc, r12\n", file
);
28866 else if (TARGET_THUMB1_ONLY
)
28867 fputs ("\tmov r12, r3\n", file
);
28869 if (TARGET_THUMB1_ONLY
)
28871 if (mi_delta
> 255)
28873 /* With -mpure-code, we cannot load MI_DELTA from the
28874 constant pool: we build it explicitly. */
28875 if (target_pure_code
)
28877 thumb1_const_print
r3 (file
, 3);
28878 thumb1_gen_const_int_1 (r3
, mi_delta
);
28882 fputs ("\tldr\tr3, ", file
);
28883 assemble_name (file
, label
);
28884 fputs ("+4\n", file
);
28886 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
28887 mi_op
, this_regno
, this_regno
);
28889 else if (mi_delta
!= 0)
28891 /* Thumb1 unified syntax requires s suffix in instruction name when
28892 one of the operands is immediate. */
28893 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
28894 mi_op
, this_regno
, this_regno
,
28900 /* TODO: Use movw/movt for large constants when available. */
28901 while (mi_delta
!= 0)
28903 if ((mi_delta
& (3 << shift
)) == 0)
28907 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
28908 mi_op
, this_regno
, this_regno
,
28909 mi_delta
& (0xff << shift
));
28910 mi_delta
&= ~(0xff << shift
);
28917 if (TARGET_THUMB1_ONLY
)
28918 fputs ("\tpop\t{r3}\n", file
);
28920 fprintf (file
, "\tbx\tr12\n");
28922 /* With -mpure-code, we don't need to emit literals for the
28923 function address and delta since we emitted code to build
28925 if (!target_pure_code
)
28927 ASM_OUTPUT_ALIGN (file
, 2);
28928 assemble_name (file
, label
);
28929 fputs (":\n", file
);
28932 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28933 rtx tem
= XEXP (DECL_RTL (function
), 0);
28934 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28935 pipeline offset is four rather than eight. Adjust the offset
28937 tem
= plus_constant (GET_MODE (tem
), tem
,
28938 TARGET_THUMB1_ONLY
? -3 : -7);
28939 tem
= gen_rtx_MINUS (GET_MODE (tem
),
28941 gen_rtx_SYMBOL_REF (Pmode
,
28942 ggc_strdup (labelpc
)));
28943 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
28946 /* Output ".word .LTHUNKn". */
28947 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
28949 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
28950 assemble_integer (GEN_INT (mi_delta
), 4, BITS_PER_WORD
, 1);
28955 fputs ("\tb\t", file
);
28956 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
28957 if (NEED_PLT_RELOC
)
28958 fputs ("(PLT)", file
);
28959 fputc ('\n', file
);
28962 final_end_function ();
28965 /* MI thunk handling for TARGET_32BIT. */
28968 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
28969 HOST_WIDE_INT vcall_offset
, tree function
)
28971 const bool long_call_p
= arm_is_long_call_p (function
);
28973 /* On ARM, this_regno is R0 or R1 depending on
28974 whether the function returns an aggregate or not.
28976 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
28978 ? R1_REGNUM
: R0_REGNUM
);
28980 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
28981 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
28982 reload_completed
= 1;
28983 emit_note (NOTE_INSN_PROLOGUE_END
);
28985 /* Add DELTA to THIS_RTX. */
28987 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
28988 delta
, this_rtx
, this_rtx
, false);
28990 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28991 if (vcall_offset
!= 0)
28993 /* Load *THIS_RTX. */
28994 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
28995 /* Compute *THIS_RTX + VCALL_OFFSET. */
28996 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
28998 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
28999 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
29000 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
29003 /* Generate a tail call to the target function. */
29004 if (!TREE_USED (function
))
29006 assemble_external (function
);
29007 TREE_USED (function
) = 1;
29009 rtx funexp
= XEXP (DECL_RTL (function
), 0);
29012 emit_move_insn (temp
, funexp
);
29015 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
29016 rtx_insn
*insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
29017 SIBLING_CALL_P (insn
) = 1;
29020 /* Indirect calls require a bit of fixup in PIC mode. */
29023 split_all_insns_noflow ();
29027 insn
= get_insns ();
29028 shorten_branches (insn
);
29029 final_start_function (insn
, file
, 1);
29030 final (insn
, file
, 1);
29031 final_end_function ();
29033 /* Stop pretending this is a post-reload pass. */
29034 reload_completed
= 0;
29037 /* Output code to add DELTA to the first argument, and then jump
29038 to FUNCTION. Used for C++ multiple inheritance. */
29041 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
29042 HOST_WIDE_INT vcall_offset
, tree function
)
29044 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
29046 assemble_start_function (thunk
, fnname
);
29048 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29050 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
29051 assemble_end_function (thunk
, fnname
);
29055 arm_emit_vector_const (FILE *file
, rtx x
)
29058 const char * pattern
;
29060 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
29062 switch (GET_MODE (x
))
29064 case E_V2SImode
: pattern
= "%08x"; break;
29065 case E_V4HImode
: pattern
= "%04x"; break;
29066 case E_V8QImode
: pattern
= "%02x"; break;
29067 default: gcc_unreachable ();
29070 fprintf (file
, "0x");
29071 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
29075 element
= CONST_VECTOR_ELT (x
, i
);
29076 fprintf (file
, pattern
, INTVAL (element
));
29082 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29083 HFmode constant pool entries are actually loaded with ldr. */
29085 arm_emit_fp16_const (rtx c
)
29089 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
29090 if (WORDS_BIG_ENDIAN
)
29091 assemble_zeros (2);
29092 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
29093 if (!WORDS_BIG_ENDIAN
)
29094 assemble_zeros (2);
29098 arm_output_load_gr (rtx
*operands
)
29105 if (!MEM_P (operands
[1])
29106 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
29107 || !REG_P (reg
= XEXP (sum
, 0))
29108 || !CONST_INT_P (offset
= XEXP (sum
, 1))
29109 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
29110 return "wldrw%?\t%0, %1";
29112 /* Fix up an out-of-range load of a GR register. */
29113 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
29114 wcgr
= operands
[0];
29116 output_asm_insn ("ldr%?\t%0, %1", operands
);
29118 operands
[0] = wcgr
;
29120 output_asm_insn ("tmcr%?\t%0, %1", operands
);
29121 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
29126 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29128 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29129 named arg and all anonymous args onto the stack.
29130 XXX I know the prologue shouldn't be pushing registers, but it is faster
29134 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
29135 const function_arg_info
&arg
,
29137 int second_time ATTRIBUTE_UNUSED
)
29139 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
29142 cfun
->machine
->uses_anonymous_args
= 1;
29143 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
29145 nregs
= pcum
->aapcs_ncrn
;
29148 int res
= arm_needs_doubleword_align (arg
.mode
, arg
.type
);
29149 if (res
< 0 && warn_psabi
)
29150 inform (input_location
, "parameter passing for argument of "
29151 "type %qT changed in GCC 7.1", arg
.type
);
29155 if (res
> 1 && warn_psabi
)
29156 inform (input_location
,
29157 "parameter passing for argument of type "
29158 "%qT changed in GCC 9.1", arg
.type
);
29163 nregs
= pcum
->nregs
;
29165 if (nregs
< NUM_ARG_REGS
)
29166 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
29169 /* We can't rely on the caller doing the proper promotion when
29170 using APCS or ATPCS. */
29173 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
29175 return !TARGET_AAPCS_BASED
;
29178 static machine_mode
29179 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
29181 int *punsignedp ATTRIBUTE_UNUSED
,
29182 const_tree fntype ATTRIBUTE_UNUSED
,
29183 int for_return ATTRIBUTE_UNUSED
)
29185 if (GET_MODE_CLASS (mode
) == MODE_INT
29186 && GET_MODE_SIZE (mode
) < 4)
29194 arm_default_short_enums (void)
29196 return ARM_DEFAULT_SHORT_ENUMS
;
29200 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29203 arm_align_anon_bitfield (void)
29205 return TARGET_AAPCS_BASED
;
29209 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29212 arm_cxx_guard_type (void)
29214 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
29218 /* The EABI says test the least significant bit of a guard variable. */
29221 arm_cxx_guard_mask_bit (void)
29223 return TARGET_AAPCS_BASED
;
29227 /* The EABI specifies that all array cookies are 8 bytes long. */
29230 arm_get_cookie_size (tree type
)
29234 if (!TARGET_AAPCS_BASED
)
29235 return default_cxx_get_cookie_size (type
);
29237 size
= build_int_cst (sizetype
, 8);
29242 /* The EABI says that array cookies should also contain the element size. */
29245 arm_cookie_has_size (void)
29247 return TARGET_AAPCS_BASED
;
29251 /* The EABI says constructors and destructors should return a pointer to
29252 the object constructed/destroyed. */
29255 arm_cxx_cdtor_returns_this (void)
29257 return TARGET_AAPCS_BASED
;
29260 /* The EABI says that an inline function may never be the key
29264 arm_cxx_key_method_may_be_inline (void)
29266 return !TARGET_AAPCS_BASED
;
29270 arm_cxx_determine_class_data_visibility (tree decl
)
29272 if (!TARGET_AAPCS_BASED
29273 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
29276 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29277 is exported. However, on systems without dynamic vague linkage,
29278 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29279 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
29280 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
29282 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
29283 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
29287 arm_cxx_class_data_always_comdat (void)
29289 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29290 vague linkage if the class has no key function. */
29291 return !TARGET_AAPCS_BASED
;
29295 /* The EABI says __aeabi_atexit should be used to register static
29299 arm_cxx_use_aeabi_atexit (void)
29301 return TARGET_AAPCS_BASED
;
29306 arm_set_return_address (rtx source
, rtx scratch
)
29308 arm_stack_offsets
*offsets
;
29309 HOST_WIDE_INT delta
;
29311 unsigned long saved_regs
;
29313 offsets
= arm_get_frame_offsets ();
29314 saved_regs
= offsets
->saved_regs_mask
;
29316 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
29317 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29320 if (frame_pointer_needed
)
29321 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
29324 /* LR will be the first saved register. */
29325 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
29330 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
29331 GEN_INT (delta
& ~4095)));
29336 addr
= stack_pointer_rtx
;
29338 addr
= plus_constant (Pmode
, addr
, delta
);
29341 /* The store needs to be marked to prevent DSE from deleting
29342 it as dead if it is based on fp. */
29343 mem
= gen_frame_mem (Pmode
, addr
);
29344 MEM_VOLATILE_P (mem
) = true;
29345 emit_move_insn (mem
, source
);
29351 thumb_set_return_address (rtx source
, rtx scratch
)
29353 arm_stack_offsets
*offsets
;
29354 HOST_WIDE_INT delta
;
29355 HOST_WIDE_INT limit
;
29358 unsigned long mask
;
29362 offsets
= arm_get_frame_offsets ();
29363 mask
= offsets
->saved_regs_mask
;
29364 if (mask
& (1 << LR_REGNUM
))
29367 /* Find the saved regs. */
29368 if (frame_pointer_needed
)
29370 delta
= offsets
->soft_frame
- offsets
->saved_args
;
29371 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
29377 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
29380 /* Allow for the stack frame. */
29381 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
29383 /* The link register is always the first saved register. */
29386 /* Construct the address. */
29387 addr
= gen_rtx_REG (SImode
, reg
);
29390 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
29391 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
29395 addr
= plus_constant (Pmode
, addr
, delta
);
29397 /* The store needs to be marked to prevent DSE from deleting
29398 it as dead if it is based on fp. */
29399 mem
= gen_frame_mem (Pmode
, addr
);
29400 MEM_VOLATILE_P (mem
) = true;
29401 emit_move_insn (mem
, source
);
29404 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
29407 /* Implements target hook vector_mode_supported_p. */
29409 arm_vector_mode_supported_p (machine_mode mode
)
29411 /* Neon also supports V2SImode, etc. listed in the clause below. */
29412 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
29413 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
29414 || mode
== V2DImode
|| mode
== V8HFmode
|| mode
== V4BFmode
29415 || mode
== V8BFmode
))
29418 if ((TARGET_NEON
|| TARGET_IWMMXT
)
29419 && ((mode
== V2SImode
)
29420 || (mode
== V4HImode
)
29421 || (mode
== V8QImode
)))
29424 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
29425 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
29426 || mode
== V2HAmode
))
29429 if (TARGET_HAVE_MVE
29430 && (mode
== V2DImode
|| mode
== V4SImode
|| mode
== V8HImode
29431 || mode
== V16QImode
29432 || mode
== V16BImode
|| mode
== V8BImode
|| mode
== V4BImode
))
29435 if (TARGET_HAVE_MVE_FLOAT
29436 && (mode
== V2DFmode
|| mode
== V4SFmode
|| mode
== V8HFmode
))
29442 /* Implements target hook array_mode_supported_p. */
29445 arm_array_mode_supported_p (machine_mode mode
,
29446 unsigned HOST_WIDE_INT nelems
)
29448 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29449 for now, as the lane-swapping logic needs to be extended in the expanders.
29450 See PR target/82518. */
29451 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
29452 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
29453 && (nelems
>= 2 && nelems
<= 4))
29456 if (TARGET_HAVE_MVE
&& !BYTES_BIG_ENDIAN
29457 && VALID_MVE_MODE (mode
) && (nelems
== 2 || nelems
== 4))
29463 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29464 registers when autovectorizing for Neon, at least until multiple vector
29465 widths are supported properly by the middle-end. */
29467 static machine_mode
29468 arm_preferred_simd_mode (scalar_mode mode
)
29474 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HFmode
: V8HFmode
;
29476 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
29478 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
29480 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
29482 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
29484 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
29491 if (TARGET_REALLY_IWMMXT
)
29504 if (TARGET_HAVE_MVE
)
29517 if (TARGET_HAVE_MVE_FLOAT
)
29531 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29533 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29534 using r0-r4 for function arguments, r7 for the stack frame and don't have
29535 enough left over to do doubleword arithmetic. For Thumb-2 all the
29536 potentially problematic instructions accept high registers so this is not
29537 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29538 that require many low registers. */
29540 arm_class_likely_spilled_p (reg_class_t rclass
)
29542 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
29543 || rclass
== CC_REG
)
29546 return default_class_likely_spilled_p (rclass
);
29549 /* Implements target hook small_register_classes_for_mode_p. */
29551 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
29553 return TARGET_THUMB1
;
29556 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29557 ARM insns and therefore guarantee that the shift count is modulo 256.
29558 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29559 guarantee no particular behavior for out-of-range counts. */
29561 static unsigned HOST_WIDE_INT
29562 arm_shift_truncation_mask (machine_mode mode
)
29564 return mode
== SImode
? 255 : 0;
29568 /* Map internal gcc register numbers to DWARF2 register numbers. */
29571 arm_debugger_regno (unsigned int regno
)
29576 if (IS_VFP_REGNUM (regno
))
29578 /* See comment in arm_dwarf_register_span. */
29579 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29580 return 64 + regno
- FIRST_VFP_REGNUM
;
29582 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
29585 if (IS_IWMMXT_GR_REGNUM (regno
))
29586 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
29588 if (IS_IWMMXT_REGNUM (regno
))
29589 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
29591 return DWARF_FRAME_REGISTERS
;
29594 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29595 GCC models tham as 64 32-bit registers, so we need to describe this to
29596 the DWARF generation code. Other registers can use the default. */
29598 arm_dwarf_register_span (rtx rtl
)
29606 regno
= REGNO (rtl
);
29607 if (!IS_VFP_REGNUM (regno
))
29610 /* XXX FIXME: The EABI defines two VFP register ranges:
29611 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29613 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29614 corresponding D register. Until GDB supports this, we shall use the
29615 legacy encodings. We also use these encodings for D0-D15 for
29616 compatibility with older debuggers. */
29617 mode
= GET_MODE (rtl
);
29618 if (GET_MODE_SIZE (mode
) < 8)
29621 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
29623 nregs
= GET_MODE_SIZE (mode
) / 4;
29624 for (i
= 0; i
< nregs
; i
+= 2)
29625 if (TARGET_BIG_END
)
29627 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29628 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
29632 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
29633 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
29638 nregs
= GET_MODE_SIZE (mode
) / 8;
29639 for (i
= 0; i
< nregs
; i
++)
29640 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
29643 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
29646 #if ARM_UNWIND_INFO
29647 /* Emit unwind directives for a store-multiple instruction or stack pointer
29648 push during alignment.
29649 These should only ever be generated by the function prologue code, so
29650 expect them to have a particular form.
29651 The store-multiple instruction sometimes pushes pc as the last register,
29652 although it should not be tracked into unwind information, or for -Os
29653 sometimes pushes some dummy registers before first register that needs
29654 to be tracked in unwind information; such dummy registers are there just
29655 to avoid separate stack adjustment, and will not be restored in the
29659 arm_unwind_emit_sequence (FILE * out_file
, rtx p
)
29662 HOST_WIDE_INT offset
;
29663 HOST_WIDE_INT nregs
;
29667 unsigned padfirst
= 0, padlast
= 0;
29670 e
= XVECEXP (p
, 0, 0);
29671 gcc_assert (GET_CODE (e
) == SET
);
29673 /* First insn will adjust the stack pointer. */
29674 gcc_assert (GET_CODE (e
) == SET
29675 && REG_P (SET_DEST (e
))
29676 && REGNO (SET_DEST (e
)) == SP_REGNUM
29677 && GET_CODE (SET_SRC (e
)) == PLUS
);
29679 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
29680 nregs
= XVECLEN (p
, 0) - 1;
29681 gcc_assert (nregs
);
29683 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
29686 /* For -Os dummy registers can be pushed at the beginning to
29687 avoid separate stack pointer adjustment. */
29688 e
= XVECEXP (p
, 0, 1);
29689 e
= XEXP (SET_DEST (e
), 0);
29690 if (GET_CODE (e
) == PLUS
)
29691 padfirst
= INTVAL (XEXP (e
, 1));
29692 gcc_assert (padfirst
== 0 || optimize_size
);
29693 /* The function prologue may also push pc, but not annotate it as it is
29694 never restored. We turn this into a stack pointer adjustment. */
29695 e
= XVECEXP (p
, 0, nregs
);
29696 e
= XEXP (SET_DEST (e
), 0);
29697 if (GET_CODE (e
) == PLUS
)
29698 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
29700 padlast
= offset
- 4;
29701 gcc_assert (padlast
== 0 || padlast
== 4);
29703 fprintf (out_file
, "\t.pad #4\n");
29705 fprintf (out_file
, "\t.save {");
29707 else if (IS_VFP_REGNUM (reg
))
29710 fprintf (out_file
, "\t.vsave {");
29713 /* Unknown register type. */
29714 gcc_unreachable ();
29716 /* If the stack increment doesn't match the size of the saved registers,
29717 something has gone horribly wrong. */
29718 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
29722 /* The remaining insns will describe the stores. */
29723 for (i
= 1; i
<= nregs
; i
++)
29725 /* Expect (set (mem <addr>) (reg)).
29726 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29727 e
= XVECEXP (p
, 0, i
);
29728 gcc_assert (GET_CODE (e
) == SET
29729 && MEM_P (SET_DEST (e
))
29730 && REG_P (SET_SRC (e
)));
29732 reg
= REGNO (SET_SRC (e
));
29733 gcc_assert (reg
>= lastreg
);
29736 fprintf (out_file
, ", ");
29737 /* We can't use %r for vfp because we need to use the
29738 double precision register names. */
29739 if (IS_VFP_REGNUM (reg
))
29740 asm_fprintf (out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
29742 asm_fprintf (out_file
, "%r", reg
);
29746 /* Check that the addresses are consecutive. */
29747 e
= XEXP (SET_DEST (e
), 0);
29748 if (GET_CODE (e
) == PLUS
)
29749 gcc_assert (REG_P (XEXP (e
, 0))
29750 && REGNO (XEXP (e
, 0)) == SP_REGNUM
29751 && CONST_INT_P (XEXP (e
, 1))
29752 && offset
== INTVAL (XEXP (e
, 1)));
29756 && REGNO (e
) == SP_REGNUM
);
29757 offset
+= reg_size
;
29760 fprintf (out_file
, "}\n");
29762 fprintf (out_file
, "\t.pad #%d\n", padfirst
);
29765 /* Emit unwind directives for a SET. */
29768 arm_unwind_emit_set (FILE * out_file
, rtx p
)
29776 switch (GET_CODE (e0
))
29779 /* Pushing a single register. */
29780 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
29781 || !REG_P (XEXP (XEXP (e0
, 0), 0))
29782 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
29785 asm_fprintf (out_file
, "\t.save ");
29786 if (IS_VFP_REGNUM (REGNO (e1
)))
29787 asm_fprintf(out_file
, "{d%d}\n",
29788 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
29790 asm_fprintf(out_file
, "{%r}\n", REGNO (e1
));
29794 if (REGNO (e0
) == SP_REGNUM
)
29796 /* A stack increment. */
29797 if (GET_CODE (e1
) != PLUS
29798 || !REG_P (XEXP (e1
, 0))
29799 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
29800 || !CONST_INT_P (XEXP (e1
, 1)))
29803 asm_fprintf (out_file
, "\t.pad #%wd\n",
29804 -INTVAL (XEXP (e1
, 1)));
29806 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
29808 HOST_WIDE_INT offset
;
29810 if (GET_CODE (e1
) == PLUS
)
29812 if (!REG_P (XEXP (e1
, 0))
29813 || !CONST_INT_P (XEXP (e1
, 1)))
29815 reg
= REGNO (XEXP (e1
, 0));
29816 offset
= INTVAL (XEXP (e1
, 1));
29817 asm_fprintf (out_file
, "\t.setfp %r, %r, #%wd\n",
29818 HARD_FRAME_POINTER_REGNUM
, reg
,
29821 else if (REG_P (e1
))
29824 asm_fprintf (out_file
, "\t.setfp %r, %r\n",
29825 HARD_FRAME_POINTER_REGNUM
, reg
);
29830 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
29832 /* Move from sp to reg. */
29833 asm_fprintf (out_file
, "\t.movsp %r\n", REGNO (e0
));
29835 else if (GET_CODE (e1
) == PLUS
29836 && REG_P (XEXP (e1
, 0))
29837 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
29838 && CONST_INT_P (XEXP (e1
, 1)))
29840 /* Set reg to offset from sp. */
29841 asm_fprintf (out_file
, "\t.movsp %r, #%d\n",
29842 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
29854 /* Emit unwind directives for the given insn. */
29857 arm_unwind_emit (FILE * out_file
, rtx_insn
*insn
)
29860 bool handled_one
= false;
29862 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
29865 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
29866 && (TREE_NOTHROW (current_function_decl
)
29867 || crtl
->all_throwers_are_sibcalls
))
29870 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
29873 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
29875 switch (REG_NOTE_KIND (note
))
29877 case REG_FRAME_RELATED_EXPR
:
29878 pat
= XEXP (note
, 0);
29881 case REG_CFA_REGISTER
:
29882 pat
= XEXP (note
, 0);
29885 pat
= PATTERN (insn
);
29886 if (GET_CODE (pat
) == PARALLEL
)
29887 pat
= XVECEXP (pat
, 0, 0);
29890 /* Only emitted for IS_STACKALIGN re-alignment. */
29895 src
= SET_SRC (pat
);
29896 dest
= SET_DEST (pat
);
29898 gcc_assert (src
== stack_pointer_rtx
);
29899 reg
= REGNO (dest
);
29900 asm_fprintf (out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29903 handled_one
= true;
29906 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29907 to get correct dwarf information for shrink-wrap. We should not
29908 emit unwind information for it because these are used either for
29909 pretend arguments or notes to adjust sp and restore registers from
29911 case REG_CFA_DEF_CFA
:
29912 case REG_CFA_ADJUST_CFA
:
29913 case REG_CFA_RESTORE
:
29916 case REG_CFA_EXPRESSION
:
29917 case REG_CFA_OFFSET
:
29918 /* ??? Only handling here what we actually emit. */
29919 gcc_unreachable ();
29927 pat
= PATTERN (insn
);
29930 switch (GET_CODE (pat
))
29933 arm_unwind_emit_set (out_file
, pat
);
29937 /* Store multiple. */
29938 arm_unwind_emit_sequence (out_file
, pat
);
29947 /* Output a reference from a function exception table to the type_info
29948 object X. The EABI specifies that the symbol should be relocated by
29949 an R_ARM_TARGET2 relocation. */
29952 arm_output_ttype (rtx x
)
29954 fputs ("\t.word\t", asm_out_file
);
29955 output_addr_const (asm_out_file
, x
);
29956 /* Use special relocations for symbol references. */
29957 if (!CONST_INT_P (x
))
29958 fputs ("(TARGET2)", asm_out_file
);
29959 fputc ('\n', asm_out_file
);
29964 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29967 arm_asm_emit_except_personality (rtx personality
)
29969 fputs ("\t.personality\t", asm_out_file
);
29970 output_addr_const (asm_out_file
, personality
);
29971 fputc ('\n', asm_out_file
);
29973 #endif /* ARM_UNWIND_INFO */
29975 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29978 arm_asm_init_sections (void)
29980 #if ARM_UNWIND_INFO
29981 exception_section
= get_unnamed_section (0, output_section_asm_op
,
29983 #endif /* ARM_UNWIND_INFO */
29985 #ifdef OBJECT_FORMAT_ELF
29986 if (target_pure_code
)
29987 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
29991 /* Output unwind directives for the start/end of a function. */
29994 arm_output_fn_unwind (FILE * f
, bool prologue
)
29996 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
30000 fputs ("\t.fnstart\n", f
);
30003 /* If this function will never be unwound, then mark it as such.
30004 The came condition is used in arm_unwind_emit to suppress
30005 the frame annotations. */
30006 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
30007 && (TREE_NOTHROW (current_function_decl
)
30008 || crtl
->all_throwers_are_sibcalls
))
30009 fputs("\t.cantunwind\n", f
);
30011 fputs ("\t.fnend\n", f
);
30016 arm_emit_tls_decoration (FILE *fp
, rtx x
)
30018 enum tls_reloc reloc
;
30021 val
= XVECEXP (x
, 0, 0);
30022 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
30024 output_addr_const (fp
, val
);
30029 fputs ("(tlsgd)", fp
);
30031 case TLS_GD32_FDPIC
:
30032 fputs ("(tlsgd_fdpic)", fp
);
30035 fputs ("(tlsldm)", fp
);
30037 case TLS_LDM32_FDPIC
:
30038 fputs ("(tlsldm_fdpic)", fp
);
30041 fputs ("(tlsldo)", fp
);
30044 fputs ("(gottpoff)", fp
);
30046 case TLS_IE32_FDPIC
:
30047 fputs ("(gottpoff_fdpic)", fp
);
30050 fputs ("(tpoff)", fp
);
30053 fputs ("(tlsdesc)", fp
);
30056 gcc_unreachable ();
30065 fputs (" + (. - ", fp
);
30066 output_addr_const (fp
, XVECEXP (x
, 0, 2));
30067 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30068 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
30069 output_addr_const (fp
, XVECEXP (x
, 0, 3));
30079 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30082 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
30084 gcc_assert (size
== 4);
30085 fputs ("\t.word\t", file
);
30086 output_addr_const (file
, x
);
30087 fputs ("(tlsldo)", file
);
30090 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30093 arm_output_addr_const_extra (FILE *fp
, rtx x
)
30095 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
30096 return arm_emit_tls_decoration (fp
, x
);
30097 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
30100 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
30102 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
30103 assemble_name_raw (fp
, label
);
30107 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
30109 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
30113 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30117 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
30119 output_addr_const (fp
, XVECEXP (x
, 0, 0));
30123 output_addr_const (fp
, XVECEXP (x
, 0, 1));
30127 else if (GET_CODE (x
) == CONST_VECTOR
)
30128 return arm_emit_vector_const (fp
, x
);
30133 /* Output assembly for a shift instruction.
30134 SET_FLAGS determines how the instruction modifies the condition codes.
30135 0 - Do not set condition codes.
30136 1 - Set condition codes.
30137 2 - Use smallest instruction. */
30139 arm_output_shift(rtx
* operands
, int set_flags
)
30142 static const char flag_chars
[3] = {'?', '.', '!'};
30147 c
= flag_chars
[set_flags
];
30148 shift
= shift_op(operands
[3], &val
);
30152 operands
[2] = GEN_INT(val
);
30153 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
30156 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
30158 output_asm_insn (pattern
, operands
);
30162 /* Output assembly for a WMMX immediate shift instruction. */
30164 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
30166 int shift
= INTVAL (operands
[2]);
30168 machine_mode opmode
= GET_MODE (operands
[0]);
30170 gcc_assert (shift
>= 0);
30172 /* If the shift value in the register versions is > 63 (for D qualifier),
30173 31 (for W qualifier) or 15 (for H qualifier). */
30174 if (((opmode
== V4HImode
) && (shift
> 15))
30175 || ((opmode
== V2SImode
) && (shift
> 31))
30176 || ((opmode
== DImode
) && (shift
> 63)))
30180 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30181 output_asm_insn (templ
, operands
);
30182 if (opmode
== DImode
)
30184 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
30185 output_asm_insn (templ
, operands
);
30190 /* The destination register will contain all zeros. */
30191 sprintf (templ
, "wzero\t%%0");
30192 output_asm_insn (templ
, operands
);
30197 if ((opmode
== DImode
) && (shift
> 32))
30199 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
30200 output_asm_insn (templ
, operands
);
30201 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
30202 output_asm_insn (templ
, operands
);
30206 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
30207 output_asm_insn (templ
, operands
);
30212 /* Output assembly for a WMMX tinsr instruction. */
30214 arm_output_iwmmxt_tinsr (rtx
*operands
)
30216 int mask
= INTVAL (operands
[3]);
30219 int units
= mode_nunits
[GET_MODE (operands
[0])];
30220 gcc_assert ((mask
& (mask
- 1)) == 0);
30221 for (i
= 0; i
< units
; ++i
)
30223 if ((mask
& 0x01) == 1)
30229 gcc_assert (i
< units
);
30231 switch (GET_MODE (operands
[0]))
30234 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
30237 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
30240 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
30243 gcc_unreachable ();
30246 output_asm_insn (templ
, operands
);
30251 /* Output a Thumb-1 casesi dispatch sequence. */
30253 thumb1_output_casesi (rtx
*operands
)
30255 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
30257 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30259 switch (GET_MODE(diff_vec
))
30262 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30263 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30265 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
30266 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30268 return "bl\t%___gnu_thumb1_case_si";
30270 gcc_unreachable ();
30274 /* Output a Thumb-2 casesi instruction. */
30276 thumb2_output_casesi (rtx
*operands
)
30278 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
30280 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
30282 output_asm_insn ("cmp\t%0, %1", operands
);
30283 output_asm_insn ("bhi\t%l3", operands
);
30284 switch (GET_MODE(diff_vec
))
30287 return "tbb\t[%|pc, %0]";
30289 return "tbh\t[%|pc, %0, lsl #1]";
30293 output_asm_insn ("adr\t%4, %l2", operands
);
30294 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
30295 output_asm_insn ("add\t%4, %4, %5", operands
);
30300 output_asm_insn ("adr\t%4, %l2", operands
);
30301 return "ldr\t%|pc, [%4, %0, lsl #2]";
30304 gcc_unreachable ();
30308 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30309 per-core tuning structs. */
30311 arm_issue_rate (void)
30313 return current_tune
->issue_rate
;
30316 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30318 arm_sched_variable_issue (FILE *, int, rtx_insn
*insn
, int more
)
30320 if (DEBUG_INSN_P (insn
))
30323 rtx_code code
= GET_CODE (PATTERN (insn
));
30324 if (code
== USE
|| code
== CLOBBER
)
30327 if (get_attr_type (insn
) == TYPE_NO_INSN
)
30333 /* Return how many instructions should scheduler lookahead to choose the
30336 arm_first_cycle_multipass_dfa_lookahead (void)
30338 int issue_rate
= arm_issue_rate ();
30340 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
30343 /* Enable modeling of L2 auto-prefetcher. */
30345 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
30347 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
30351 arm_mangle_type (const_tree type
)
30353 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30354 has to be managled as if it is in the "std" namespace. */
30355 if (TARGET_AAPCS_BASED
30356 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
30357 return "St9__va_list";
30359 /* Half-precision floating point types. */
30360 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
30362 if (TYPE_MODE (type
) == BFmode
)
30368 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30370 if (TYPE_NAME (type
) != NULL
)
30371 return arm_mangle_builtin_type (type
);
30373 /* Use the default mangling. */
30377 /* Order of allocation of core registers for Thumb: this allocation is
30378 written over the corresponding initial entries of the array
30379 initialized with REG_ALLOC_ORDER. We allocate all low registers
30380 first. Saving and restoring a low register is usually cheaper than
30381 using a call-clobbered high register. */
30383 static const int thumb_core_reg_alloc_order
[] =
30385 3, 2, 1, 0, 4, 5, 6, 7,
30386 12, 14, 8, 9, 10, 11
30389 /* Adjust register allocation order when compiling for Thumb. */
30392 arm_order_regs_for_local_alloc (void)
30394 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
30395 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
30397 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
30398 sizeof (thumb_core_reg_alloc_order
));
30401 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30404 arm_frame_pointer_required (void)
30406 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
30409 /* If the function receives nonlocal gotos, it needs to save the frame
30410 pointer in the nonlocal_goto_save_area object. */
30411 if (cfun
->has_nonlocal_label
)
30414 /* The frame pointer is required for non-leaf APCS frames. */
30415 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
30418 /* If we are probing the stack in the prologue, we will have a faulting
30419 instruction prior to the stack adjustment and this requires a frame
30420 pointer if we want to catch the exception using the EABI unwinder. */
30421 if (!IS_INTERRUPT (arm_current_func_type ())
30422 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
30423 || flag_stack_clash_protection
)
30424 && arm_except_unwind_info (&global_options
) == UI_TARGET
30425 && cfun
->can_throw_non_call_exceptions
)
30427 HOST_WIDE_INT size
= get_frame_size ();
30429 /* That's irrelevant if there is no stack adjustment. */
30433 /* That's relevant only if there is a stack probe. */
30434 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
30436 /* We don't have the final size of the frame so adjust. */
30437 size
+= 32 * UNITS_PER_WORD
;
30438 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
30448 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30449 All modes except THUMB1 have conditional execution.
30450 If we have conditional arithmetic, return false before reload to
30451 enable some ifcvt transformations. */
30453 arm_have_conditional_execution (void)
30455 bool has_cond_exec
, enable_ifcvt_trans
;
30457 /* Only THUMB1 cannot support conditional execution. */
30458 has_cond_exec
= !TARGET_THUMB1
;
30460 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30462 enable_ifcvt_trans
= TARGET_COND_ARITH
&& !reload_completed
;
30464 return has_cond_exec
&& !enable_ifcvt_trans
;
30467 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30468 static HOST_WIDE_INT
30469 arm_vector_alignment (const_tree type
)
30471 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
30473 if (TARGET_AAPCS_BASED
)
30474 align
= MIN (align
, 64);
30479 static unsigned int
30480 arm_autovectorize_vector_modes (vector_modes
*modes
, bool)
30482 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
30484 modes
->safe_push (V16QImode
);
30485 modes
->safe_push (V8QImode
);
30491 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
30493 /* Vectors which aren't in packed structures will not be less aligned than
30494 the natural alignment of their element type, so this is safe. */
30495 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30498 return default_builtin_vector_alignment_reachable (type
, is_packed
);
30502 arm_builtin_support_vector_misalignment (machine_mode mode
,
30503 const_tree type
, int misalignment
,
30506 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
30508 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
30513 /* If the misalignment is unknown, we should be able to handle the access
30514 so long as it is not to a member of a packed data structure. */
30515 if (misalignment
== -1)
30518 /* Return true if the misalignment is a multiple of the natural alignment
30519 of the vector's element type. This is probably always going to be
30520 true in practice, since we've already established that this isn't a
30522 return ((misalignment
% align
) == 0);
30525 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
30530 arm_conditional_register_usage (void)
30534 if (TARGET_THUMB1
&& optimize_size
)
30536 /* When optimizing for size on Thumb-1, it's better not
30537 to use the HI regs, because of the overhead of
30539 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
30540 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
30543 /* The link register can be clobbered by any branch insn,
30544 but we have no way to track that at present, so mark
30545 it as unavailable. */
30547 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
30549 if (TARGET_32BIT
&& TARGET_VFP_BASE
)
30551 /* VFPv3 registers are disabled when earlier VFP
30552 versions are selected due to the definition of
30553 LAST_VFP_REGNUM. */
30554 for (regno
= FIRST_VFP_REGNUM
;
30555 regno
<= LAST_VFP_REGNUM
; ++ regno
)
30557 fixed_regs
[regno
] = 0;
30558 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
30559 || regno
>= FIRST_VFP_REGNUM
+ 32;
30561 if (TARGET_HAVE_MVE
)
30562 fixed_regs
[VPR_REGNUM
] = 0;
30565 if (TARGET_REALLY_IWMMXT
&& !TARGET_GENERAL_REGS_ONLY
)
30567 regno
= FIRST_IWMMXT_GR_REGNUM
;
30568 /* The 2002/10/09 revision of the XScale ABI has wCG0
30569 and wCG1 as call-preserved registers. The 2002/11/21
30570 revision changed this so that all wCG registers are
30571 scratch registers. */
30572 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
30573 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
30574 fixed_regs
[regno
] = 0;
30575 /* The XScale ABI has wR0 - wR9 as scratch registers,
30576 the rest as call-preserved registers. */
30577 for (regno
= FIRST_IWMMXT_REGNUM
;
30578 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
30580 fixed_regs
[regno
] = 0;
30581 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
30585 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
30587 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30588 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
30590 else if (TARGET_APCS_STACK
)
30592 fixed_regs
[10] = 1;
30593 call_used_regs
[10] = 1;
30595 /* -mcaller-super-interworking reserves r11 for calls to
30596 _interwork_r11_call_via_rN(). Making the register global
30597 is an easy way of ensuring that it remains valid for all
30599 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
30600 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
30602 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30603 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30604 if (TARGET_CALLER_INTERWORKING
)
30605 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
30608 /* The Q and GE bits are only accessed via special ACLE patterns. */
30609 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRQ_REGNUM
);
30610 CLEAR_HARD_REG_BIT (operand_reg_set
, APSRGE_REGNUM
);
30612 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30616 arm_preferred_rename_class (reg_class_t rclass
)
30618 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30619 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30620 and code size can be reduced. */
30621 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
30627 /* Compute the attribute "length" of insn "*push_multi".
30628 So this function MUST be kept in sync with that insn pattern. */
30630 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
30632 int i
, regno
, hi_reg
;
30633 int num_saves
= XVECLEN (parallel_op
, 0);
30643 regno
= REGNO (first_op
);
30644 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30645 list is 8-bit. Normally this means all registers in the list must be
30646 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30647 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30648 with 16-bit encoding. */
30649 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30650 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
30652 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
30653 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
30661 /* Compute the attribute "length" of insn. Currently, this function is used
30662 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30663 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30664 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30665 true if OPERANDS contains insn which explicit updates base register. */
30668 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
30677 rtx parallel_op
= operands
[0];
30678 /* Initialize to elements number of PARALLEL. */
30679 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
30680 /* Initialize the value to base register. */
30681 unsigned regno
= REGNO (operands
[1]);
30682 /* Skip return and write back pattern.
30683 We only need register pop pattern for later analysis. */
30684 unsigned first_indx
= 0;
30685 first_indx
+= return_pc
? 1 : 0;
30686 first_indx
+= write_back_p
? 1 : 0;
30688 /* A pop operation can be done through LDM or POP. If the base register is SP
30689 and if it's with write back, then a LDM will be alias of POP. */
30690 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
30691 bool ldm_p
= !pop_p
;
30693 /* Check base register for LDM. */
30694 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
30697 /* Check each register in the list. */
30698 for (; indx
>= first_indx
; indx
--)
30700 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
30701 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30702 comment in arm_attr_length_push_multi. */
30703 if (REGNO_REG_CLASS (regno
) == HI_REGS
30704 && (regno
!= PC_REGNUM
|| ldm_p
))
30711 /* Compute the number of instructions emitted by output_move_double. */
30713 arm_count_output_move_double_insns (rtx
*operands
)
30717 /* output_move_double may modify the operands array, so call it
30718 here on a copy of the array. */
30719 ops
[0] = operands
[0];
30720 ops
[1] = operands
[1];
30721 output_move_double (ops
, false, &count
);
30725 /* Same as above, but operands are a register/memory pair in SImode.
30726 Assumes operands has the base register in position 0 and memory in position
30727 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30729 arm_count_ldrdstrd_insns (rtx
*operands
, bool load
)
30733 int regnum
, memnum
;
30735 regnum
= 0, memnum
= 1;
30737 regnum
= 1, memnum
= 0;
30738 ops
[regnum
] = gen_rtx_REG (DImode
, REGNO (operands
[0]));
30739 ops
[memnum
] = adjust_address (operands
[2], DImode
, 0);
30740 output_move_double (ops
, false, &count
);
30746 vfp3_const_double_for_fract_bits (rtx operand
)
30748 REAL_VALUE_TYPE r0
;
30750 if (!CONST_DOUBLE_P (operand
))
30753 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
30754 if (exact_real_inverse (DFmode
, &r0
)
30755 && !REAL_VALUE_NEGATIVE (r0
))
30757 if (exact_real_truncate (DFmode
, &r0
))
30759 HOST_WIDE_INT value
= real_to_integer (&r0
);
30760 value
= value
& 0xffffffff;
30761 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
30763 int ret
= exact_log2 (value
);
30764 gcc_assert (IN_RANGE (ret
, 0, 31));
30772 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30773 log2 is in [1, 32], return that log2. Otherwise return -1.
30774 This is used in the patterns for vcvt.s32.f32 floating-point to
30775 fixed-point conversions. */
30778 vfp3_const_double_for_bits (rtx x
)
30780 const REAL_VALUE_TYPE
*r
;
30782 if (!CONST_DOUBLE_P (x
))
30785 r
= CONST_DOUBLE_REAL_VALUE (x
);
30787 if (REAL_VALUE_NEGATIVE (*r
)
30788 || REAL_VALUE_ISNAN (*r
)
30789 || REAL_VALUE_ISINF (*r
)
30790 || !real_isinteger (r
, SFmode
))
30793 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
30795 /* The exact_log2 above will have returned -1 if this is
30796 not an exact log2. */
30797 if (!IN_RANGE (hwint
, 1, 32))
30804 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30807 arm_pre_atomic_barrier (enum memmodel model
)
30809 if (need_atomic_barrier_p (model
, true))
30810 emit_insn (gen_memory_barrier ());
30814 arm_post_atomic_barrier (enum memmodel model
)
30816 if (need_atomic_barrier_p (model
, false))
30817 emit_insn (gen_memory_barrier ());
30820 /* Emit the load-exclusive and store-exclusive instructions.
30821 Use acquire and release versions if necessary. */
30824 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
30826 rtx (*gen
) (rtx
, rtx
);
30832 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
30833 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
30834 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
30835 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
30837 gcc_unreachable ();
30844 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
30845 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
30846 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
30847 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
30849 gcc_unreachable ();
30853 emit_insn (gen (rval
, mem
));
30857 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
30860 rtx (*gen
) (rtx
, rtx
, rtx
);
30866 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
30867 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
30868 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
30869 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
30871 gcc_unreachable ();
30878 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
30879 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
30880 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
30881 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
30883 gcc_unreachable ();
30887 emit_insn (gen (bval
, rval
, mem
));
30890 /* Mark the previous jump instruction as unlikely. */
30893 emit_unlikely_jump (rtx insn
)
30895 rtx_insn
*jump
= emit_jump_insn (insn
);
30896 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
30899 /* Expand a compare and swap pattern. */
30902 arm_expand_compare_and_swap (rtx operands
[])
30904 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
30905 machine_mode mode
, cmp_mode
;
30907 bval
= operands
[0];
30908 rval
= operands
[1];
30910 oldval
= operands
[3];
30911 newval
= operands
[4];
30912 is_weak
= operands
[5];
30913 mod_s
= operands
[6];
30914 mod_f
= operands
[7];
30915 mode
= GET_MODE (mem
);
30917 /* Normally the succ memory model must be stronger than fail, but in the
30918 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30919 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30921 if (TARGET_HAVE_LDACQ
30922 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
30923 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
30924 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
30930 /* For narrow modes, we're going to perform the comparison in SImode,
30931 so do the zero-extension now. */
30932 rval
= gen_reg_rtx (SImode
);
30933 oldval
= convert_modes (SImode
, mode
, oldval
, true);
30937 /* Force the value into a register if needed. We waited until after
30938 the zero-extension above to do this properly. */
30939 if (!arm_add_operand (oldval
, SImode
))
30940 oldval
= force_reg (SImode
, oldval
);
30944 if (!cmpdi_operand (oldval
, mode
))
30945 oldval
= force_reg (mode
, oldval
);
30949 gcc_unreachable ();
30953 cmp_mode
= E_SImode
;
30955 cmp_mode
= CC_Zmode
;
30957 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
30958 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode
, mode
, bdst
, rval
, mem
,
30959 oldval
, newval
, is_weak
, mod_s
, mod_f
));
30961 if (mode
== QImode
|| mode
== HImode
)
30962 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
30964 /* In all cases, we arrange for success to be signaled by Z set.
30965 This arrangement allows for the boolean result to be used directly
30966 in a subsequent branch, post optimization. For Thumb-1 targets, the
30967 boolean negation of the result is also stored in bval because Thumb-1
30968 backend lacks dependency tracking for CC flag due to flag-setting not
30969 being represented at RTL level. */
30971 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
30974 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
30975 emit_insn (gen_rtx_SET (bval
, x
));
30979 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30980 another memory store between the load-exclusive and store-exclusive can
30981 reset the monitor from Exclusive to Open state. This means we must wait
30982 until after reload to split the pattern, lest we get a register spill in
30983 the middle of the atomic sequence. Success of the compare and swap is
30984 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30985 for Thumb-1 targets (ie. negation of the boolean value returned by
30986 atomic_compare_and_swapmode standard pattern in operand 0). */
30989 arm_split_compare_and_swap (rtx operands
[])
30991 rtx rval
, mem
, oldval
, newval
, neg_bval
, mod_s_rtx
;
30993 enum memmodel mod_s
, mod_f
;
30995 rtx_code_label
*label1
, *label2
;
30998 rval
= operands
[1];
31000 oldval
= operands
[3];
31001 newval
= operands
[4];
31002 is_weak
= (operands
[5] != const0_rtx
);
31003 mod_s_rtx
= operands
[6];
31004 mod_s
= memmodel_from_int (INTVAL (mod_s_rtx
));
31005 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
31006 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
31007 mode
= GET_MODE (mem
);
31009 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
31011 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (mod_s_rtx
);
31012 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (mod_s_rtx
);
31014 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31015 a full barrier is emitted after the store-release. */
31017 use_acquire
= false;
31019 /* Checks whether a barrier is needed and emits one accordingly. */
31020 if (!(use_acquire
|| use_release
))
31021 arm_pre_atomic_barrier (mod_s
);
31026 label1
= gen_label_rtx ();
31027 emit_label (label1
);
31029 label2
= gen_label_rtx ();
31031 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
31033 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31034 as required to communicate with arm_expand_compare_and_swap. */
31037 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
31038 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31039 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31040 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
31041 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31045 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
31046 if (thumb1_cmpneg_operand (oldval
, SImode
))
31049 if (!satisfies_constraint_L (oldval
))
31051 gcc_assert (satisfies_constraint_J (oldval
));
31053 /* For such immediates, ADDS needs the source and destination regs
31056 Normally this would be handled by RA, but this is all happening
31058 emit_move_insn (neg_bval
, rval
);
31062 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval
, src
, oldval
,
31067 emit_move_insn (neg_bval
, const1_rtx
);
31068 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
31072 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
31074 /* Weak or strong, we want EQ to be true for success, so that we
31075 match the flags that we got from the compare above. */
31078 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
31079 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
31080 emit_insn (gen_rtx_SET (cond
, x
));
31085 /* Z is set to boolean value of !neg_bval, as required to communicate
31086 with arm_expand_compare_and_swap. */
31087 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
31088 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
31091 if (!is_mm_relaxed (mod_f
))
31092 emit_label (label2
);
31094 /* Checks whether a barrier is needed and emits one accordingly. */
31096 || !(use_acquire
|| use_release
))
31097 arm_post_atomic_barrier (mod_s
);
31099 if (is_mm_relaxed (mod_f
))
31100 emit_label (label2
);
31103 /* Split an atomic operation pattern. Operation is given by CODE and is one
31104 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31105 operation). Operation is performed on the content at MEM and on VALUE
31106 following the memory model MODEL_RTX. The content at MEM before and after
31107 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31108 success of the operation is returned in COND. Using a scratch register or
31109 an operand register for these determines what result is returned for that
31113 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
31114 rtx value
, rtx model_rtx
, rtx cond
)
31116 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
31117 machine_mode mode
= GET_MODE (mem
);
31118 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
31119 rtx_code_label
*label
;
31120 bool all_low_regs
, bind_old_new
;
31123 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
31125 bool use_acquire
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_acquire (model_rtx
);
31126 bool use_release
= TARGET_HAVE_LDACQ
&& aarch_mm_needs_release (model_rtx
);
31128 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31129 a full barrier is emitted after the store-release. */
31131 use_acquire
= false;
31133 /* Checks whether a barrier is needed and emits one accordingly. */
31134 if (!(use_acquire
|| use_release
))
31135 arm_pre_atomic_barrier (model
);
31137 label
= gen_label_rtx ();
31138 emit_label (label
);
31141 new_out
= gen_lowpart (wmode
, new_out
);
31143 old_out
= gen_lowpart (wmode
, old_out
);
31146 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
31148 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
31150 /* Does the operation require destination and first operand to use the same
31151 register? This is decided by register constraints of relevant insn
31152 patterns in thumb1.md. */
31153 gcc_assert (!new_out
|| REG_P (new_out
));
31154 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
31155 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
31156 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
31161 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
31163 /* We want to return the old value while putting the result of the operation
31164 in the same register as the old value so copy the old value over to the
31165 destination register and use that register for the operation. */
31166 if (old_out
&& bind_old_new
)
31168 emit_move_insn (new_out
, old_out
);
31179 x
= gen_rtx_AND (wmode
, old_out
, value
);
31180 emit_insn (gen_rtx_SET (new_out
, x
));
31181 x
= gen_rtx_NOT (wmode
, new_out
);
31182 emit_insn (gen_rtx_SET (new_out
, x
));
31186 if (CONST_INT_P (value
))
31188 value
= gen_int_mode (-INTVAL (value
), wmode
);
31194 if (mode
== DImode
)
31196 /* DImode plus/minus need to clobber flags. */
31197 /* The adddi3 and subdi3 patterns are incorrectly written so that
31198 they require matching operands, even when we could easily support
31199 three operands. Thankfully, this can be fixed up post-splitting,
31200 as the individual add+adc patterns do accept three operands and
31201 post-reload cprop can make these moves go away. */
31202 emit_move_insn (new_out
, old_out
);
31204 x
= gen_adddi3 (new_out
, new_out
, value
);
31206 x
= gen_subdi3 (new_out
, new_out
, value
);
31213 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
31214 emit_insn (gen_rtx_SET (new_out
, x
));
31218 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
31221 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
31222 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
31224 /* Checks whether a barrier is needed and emits one accordingly. */
31226 || !(use_acquire
|| use_release
))
31227 arm_post_atomic_barrier (model
);
31230 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31232 arm_mode_to_pred_mode (machine_mode mode
)
31234 switch (GET_MODE_NUNITS (mode
))
31236 case 16: return V16BImode
;
31237 case 8: return V8BImode
;
31238 case 4: return V4BImode
;
31240 return opt_machine_mode ();
31243 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31244 If CAN_INVERT, store either the result or its inverse in TARGET
31245 and return true if TARGET contains the inverse. If !CAN_INVERT,
31246 always store the result in TARGET, never its inverse.
31248 Note that the handling of floating-point comparisons is not
31252 arm_expand_vector_compare (rtx target
, rtx_code code
, rtx op0
, rtx op1
,
31255 machine_mode cmp_result_mode
= GET_MODE (target
);
31256 machine_mode cmp_mode
= GET_MODE (op0
);
31260 /* MVE supports more comparisons than Neon. */
31261 if (TARGET_HAVE_MVE
)
31266 /* For these we need to compute the inverse of the requested
31275 code
= reverse_condition_maybe_unordered (code
);
31278 /* Recursively emit the inverted comparison into a temporary
31279 and then store its inverse in TARGET. This avoids reusing
31280 TARGET (which for integer NE could be one of the inputs). */
31281 rtx tmp
= gen_reg_rtx (cmp_result_mode
);
31282 if (arm_expand_vector_compare (tmp
, code
, op0
, op1
, true))
31283 gcc_unreachable ();
31284 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (cmp_result_mode
, tmp
)));
31297 /* These are natively supported by Neon for zero comparisons, but otherwise
31298 require the operands to be swapped. For MVE, we can only compare
31302 if (!TARGET_HAVE_MVE
)
31303 if (op1
!= CONST0_RTX (cmp_mode
))
31305 code
= swap_condition (code
);
31306 std::swap (op0
, op1
);
31308 /* Fall through. */
31310 /* These are natively supported by Neon for both register and zero
31311 operands. MVE supports registers only. */
31316 if (TARGET_HAVE_MVE
)
31318 switch (GET_MODE_CLASS (cmp_mode
))
31320 case MODE_VECTOR_INT
:
31321 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31322 op0
, force_reg (cmp_mode
, op1
)));
31324 case MODE_VECTOR_FLOAT
:
31325 if (TARGET_HAVE_MVE_FLOAT
)
31326 emit_insn (gen_mve_vcmpq_f (code
, cmp_mode
, target
,
31327 op0
, force_reg (cmp_mode
, op1
)));
31329 gcc_unreachable ();
31332 gcc_unreachable ();
31336 emit_insn (gen_neon_vc (code
, cmp_mode
, target
, op0
, op1
));
31339 /* These are natively supported for register operands only.
31340 Comparisons with zero aren't useful and should be folded
31341 or canonicalized by target-independent code. */
31344 if (TARGET_HAVE_MVE
)
31345 emit_insn (gen_mve_vcmpq (code
, cmp_mode
, target
,
31346 op0
, force_reg (cmp_mode
, op1
)));
31348 emit_insn (gen_neon_vc (code
, cmp_mode
, target
,
31349 op0
, force_reg (cmp_mode
, op1
)));
31352 /* These require the operands to be swapped and likewise do not
31353 support comparisons with zero. */
31356 if (TARGET_HAVE_MVE
)
31357 emit_insn (gen_mve_vcmpq (swap_condition (code
), cmp_mode
, target
,
31358 force_reg (cmp_mode
, op1
), op0
));
31360 emit_insn (gen_neon_vc (swap_condition (code
), cmp_mode
,
31361 target
, force_reg (cmp_mode
, op1
), op0
));
31364 /* These need a combination of two comparisons. */
31368 /* Operands are LTGT iff (a > b || a > b).
31369 Operands are ORDERED iff (a > b || a <= b). */
31370 rtx gt_res
= gen_reg_rtx (cmp_result_mode
);
31371 rtx alt_res
= gen_reg_rtx (cmp_result_mode
);
31372 rtx_code alt_code
= (code
== LTGT
? LT
: LE
);
31373 if (arm_expand_vector_compare (gt_res
, GT
, op0
, op1
, true)
31374 || arm_expand_vector_compare (alt_res
, alt_code
, op0
, op1
, true))
31375 gcc_unreachable ();
31376 emit_insn (gen_rtx_SET (target
, gen_rtx_IOR (cmp_result_mode
,
31377 gt_res
, alt_res
)));
31382 gcc_unreachable ();
31386 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31387 CMP_RESULT_MODE is the mode of the comparison result. */
31390 arm_expand_vcond (rtx
*operands
, machine_mode cmp_result_mode
)
31392 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31393 arm_expand_vector_compare, and another one here. */
31396 if (TARGET_HAVE_MVE
)
31397 mask
= gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode
).require ());
31399 mask
= gen_reg_rtx (cmp_result_mode
);
31401 bool inverted
= arm_expand_vector_compare (mask
, GET_CODE (operands
[3]),
31402 operands
[4], operands
[5], true);
31404 std::swap (operands
[1], operands
[2]);
31406 emit_insn (gen_neon_vbsl (GET_MODE (operands
[0]), operands
[0],
31407 mask
, operands
[1], operands
[2]));
31410 machine_mode cmp_mode
= GET_MODE (operands
[0]);
31412 switch (GET_MODE_CLASS (cmp_mode
))
31414 case MODE_VECTOR_INT
:
31415 emit_insn (gen_mve_vpselq (VPSELQ_S
, cmp_mode
, operands
[0],
31416 operands
[1], operands
[2], mask
));
31418 case MODE_VECTOR_FLOAT
:
31419 if (TARGET_HAVE_MVE_FLOAT
)
31420 emit_insn (gen_mve_vpselq_f (cmp_mode
, operands
[0],
31421 operands
[1], operands
[2], mask
));
31423 gcc_unreachable ();
31426 gcc_unreachable ();
31431 #define MAX_VECT_LEN 16
31433 struct expand_vec_perm_d
31435 rtx target
, op0
, op1
;
31436 vec_perm_indices perm
;
31437 machine_mode vmode
;
31442 /* Generate a variable permutation. */
31445 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31447 machine_mode vmode
= GET_MODE (target
);
31448 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31450 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
31451 gcc_checking_assert (GET_MODE (op0
) == vmode
);
31452 gcc_checking_assert (GET_MODE (op1
) == vmode
);
31453 gcc_checking_assert (GET_MODE (sel
) == vmode
);
31454 gcc_checking_assert (TARGET_NEON
);
31458 if (vmode
== V8QImode
)
31459 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
31461 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
31467 if (vmode
== V8QImode
)
31469 pair
= gen_reg_rtx (V16QImode
);
31470 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
31471 pair
= gen_lowpart (TImode
, pair
);
31472 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
31476 pair
= gen_reg_rtx (OImode
);
31477 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
31478 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
31484 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
31486 machine_mode vmode
= GET_MODE (target
);
31487 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
31488 bool one_vector_p
= rtx_equal_p (op0
, op1
);
31491 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31492 numbering of elements for big-endian, we must reverse the order. */
31493 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
31495 /* The VTBL instruction does not use a modulo index, so we must take care
31496 of that ourselves. */
31497 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31498 mask
= gen_const_vec_duplicate (vmode
, mask
);
31499 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
31501 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
31504 /* Map lane ordering between architectural lane order, and GCC lane order,
31505 taking into account ABI. See comment above output_move_neon for details. */
31508 neon_endian_lane_map (machine_mode mode
, int lane
)
31510 if (BYTES_BIG_ENDIAN
)
31512 int nelems
= GET_MODE_NUNITS (mode
);
31513 /* Reverse lane order. */
31514 lane
= (nelems
- 1 - lane
);
31515 /* Reverse D register order, to match ABI. */
31516 if (GET_MODE_SIZE (mode
) == 16)
31517 lane
= lane
^ (nelems
/ 2);
31522 /* Some permutations index into pairs of vectors, this is a helper function
31523 to map indexes into those pairs of vectors. */
31526 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
31528 int nelem
= GET_MODE_NUNITS (mode
);
31529 if (BYTES_BIG_ENDIAN
)
31531 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
31535 /* Generate or test for an insn that supports a constant permutation. */
31537 /* Recognize patterns for the VUZP insns. */
31540 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
31542 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31543 rtx out0
, out1
, in0
, in1
;
31547 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31550 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31551 big endian pattern on 64 bit vectors, so we correct for that. */
31552 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
31553 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
31555 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
31557 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31559 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
31563 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31565 for (i
= 0; i
< nelt
; i
++)
31568 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
31569 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
31579 if (swap_nelt
!= 0)
31580 std::swap (in0
, in1
);
31583 out1
= gen_reg_rtx (d
->vmode
);
31585 std::swap (out0
, out1
);
31587 emit_insn (gen_neon_vuzp_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31591 /* Recognize patterns for the VZIP insns. */
31594 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
31596 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
31597 rtx out0
, out1
, in0
, in1
;
31601 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31604 is_swapped
= BYTES_BIG_ENDIAN
;
31606 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
31609 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
31611 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
31615 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31617 for (i
= 0; i
< nelt
/ 2; i
++)
31620 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
31621 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
31625 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
31626 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
31638 std::swap (in0
, in1
);
31641 out1
= gen_reg_rtx (d
->vmode
);
31643 std::swap (out0
, out1
);
31645 emit_insn (gen_neon_vzip_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31649 /* Recognize patterns for the VREV insns. */
31651 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
31653 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
31654 rtx (*gen
) (machine_mode
, rtx
, rtx
);
31656 if (!d
->one_vector_p
)
31667 gen
= gen_neon_vrev64
;
31678 gen
= gen_neon_vrev32
;
31684 gen
= gen_neon_vrev64
;
31695 gen
= gen_neon_vrev16
;
31699 gen
= gen_neon_vrev32
;
31705 gen
= gen_neon_vrev64
;
31715 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
31716 for (j
= 0; j
<= diff
; j
+= 1)
31718 /* This is guaranteed to be true as the value of diff
31719 is 7, 3, 1 and we should have enough elements in the
31720 queue to generate this. Getting a vector mask with a
31721 value of diff other than these values implies that
31722 something is wrong by the time we get here. */
31723 gcc_assert (i
+ j
< nelt
);
31724 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
31732 emit_insn (gen (d
->vmode
, d
->target
, d
->op0
));
31736 /* Recognize patterns for the VTRN insns. */
31739 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
31741 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
31742 rtx out0
, out1
, in0
, in1
;
31744 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
31747 /* Note that these are little-endian tests. Adjust for big-endian later. */
31748 if (d
->perm
[0] == 0)
31750 else if (d
->perm
[0] == 1)
31754 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
31756 for (i
= 0; i
< nelt
; i
+= 2)
31758 if (d
->perm
[i
] != i
+ odd
)
31760 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
31770 if (BYTES_BIG_ENDIAN
)
31772 std::swap (in0
, in1
);
31777 out1
= gen_reg_rtx (d
->vmode
);
31779 std::swap (out0
, out1
);
31781 emit_insn (gen_neon_vtrn_internal (d
->vmode
, out0
, in0
, in1
, out1
));
31785 /* Recognize patterns for the VEXT insns. */
31788 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
31790 unsigned int i
, nelt
= d
->perm
.length ();
31793 unsigned int location
;
31795 unsigned int next
= d
->perm
[0] + 1;
31797 /* TODO: Handle GCC's numbering of elements for big-endian. */
31798 if (BYTES_BIG_ENDIAN
)
31801 /* Check if the extracted indexes are increasing by one. */
31802 for (i
= 1; i
< nelt
; next
++, i
++)
31804 /* If we hit the most significant element of the 2nd vector in
31805 the previous iteration, no need to test further. */
31806 if (next
== 2 * nelt
)
31809 /* If we are operating on only one vector: it could be a
31810 rotation. If there are only two elements of size < 64, let
31811 arm_evpc_neon_vrev catch it. */
31812 if (d
->one_vector_p
&& (next
== nelt
))
31814 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
31820 if (d
->perm
[i
] != next
)
31824 location
= d
->perm
[0];
31830 offset
= GEN_INT (location
);
31832 if(d
->vmode
== E_DImode
)
31835 emit_insn (gen_neon_vext (d
->vmode
, d
->target
, d
->op0
, d
->op1
, offset
));
31839 /* The NEON VTBL instruction is a fully variable permuation that's even
31840 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31841 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31842 can do slightly better by expanding this as a constant where we don't
31843 have to apply a mask. */
31846 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
31848 rtx rperm
[MAX_VECT_LEN
], sel
;
31849 machine_mode vmode
= d
->vmode
;
31850 unsigned int i
, nelt
= d
->perm
.length ();
31852 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31853 numbering of elements for big-endian, we must reverse the order. */
31854 if (BYTES_BIG_ENDIAN
)
31860 /* Generic code will try constant permutation twice. Once with the
31861 original mode and again with the elements lowered to QImode.
31862 So wait and don't do the selector expansion ourselves. */
31863 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
31866 for (i
= 0; i
< nelt
; ++i
)
31867 rperm
[i
] = GEN_INT (d
->perm
[i
]);
31868 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
31869 sel
= force_reg (vmode
, sel
);
31871 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
31876 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
31878 /* Check if the input mask matches vext before reordering the
31881 if (arm_evpc_neon_vext (d
))
31884 /* The pattern matching functions above are written to look for a small
31885 number to begin the sequence (0, 1, N/2). If we begin with an index
31886 from the second operand, we can swap the operands. */
31887 unsigned int nelt
= d
->perm
.length ();
31888 if (d
->perm
[0] >= nelt
)
31890 d
->perm
.rotate_inputs (1);
31891 std::swap (d
->op0
, d
->op1
);
31896 if (arm_evpc_neon_vuzp (d
))
31898 if (arm_evpc_neon_vzip (d
))
31900 if (arm_evpc_neon_vrev (d
))
31902 if (arm_evpc_neon_vtrn (d
))
31904 return arm_evpc_neon_vtbl (d
);
31909 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31912 arm_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
31913 rtx target
, rtx op0
, rtx op1
,
31914 const vec_perm_indices
&sel
)
31916 if (vmode
!= op_mode
)
31919 struct expand_vec_perm_d d
;
31920 int i
, nelt
, which
;
31922 if (!VALID_NEON_DREG_MODE (vmode
) && !VALID_NEON_QREG_MODE (vmode
))
31928 rtx nop0
= force_reg (vmode
, op0
);
31934 op1
= force_reg (vmode
, op1
);
31939 gcc_assert (VECTOR_MODE_P (d
.vmode
));
31940 d
.testing_p
= !target
;
31942 nelt
= GET_MODE_NUNITS (d
.vmode
);
31943 for (i
= which
= 0; i
< nelt
; ++i
)
31945 int ei
= sel
[i
] & (2 * nelt
- 1);
31946 which
|= (ei
< nelt
? 1 : 2);
31955 d
.one_vector_p
= false;
31956 if (d
.testing_p
|| !rtx_equal_p (op0
, op1
))
31959 /* The elements of PERM do not suggest that only the first operand
31960 is used, but both operands are identical. Allow easier matching
31961 of the permutation by folding the permutation into the single
31966 d
.one_vector_p
= true;
31971 d
.one_vector_p
= true;
31975 d
.perm
.new_vector (sel
.encoding (), d
.one_vector_p
? 1 : 2, nelt
);
31978 return arm_expand_vec_perm_const_1 (&d
);
31980 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
31981 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
31982 if (!d
.one_vector_p
)
31983 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
31986 bool ret
= arm_expand_vec_perm_const_1 (&d
);
31993 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
31995 /* If we are soft float and we do not have ldrd
31996 then all auto increment forms are ok. */
31997 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
32002 /* Post increment and Pre Decrement are supported for all
32003 instruction forms except for vector forms. */
32006 if (VECTOR_MODE_P (mode
))
32008 if (code
!= ARM_PRE_DEC
)
32018 /* Without LDRD and mode size greater than
32019 word size, there is no point in auto-incrementing
32020 because ldm and stm will not have these forms. */
32021 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
32024 /* Vector and floating point modes do not support
32025 these auto increment forms. */
32026 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
32039 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32040 on ARM, since we know that shifts by negative amounts are no-ops.
32041 Additionally, the default expansion code is not available or suitable
32042 for post-reload insn splits (this can occur when the register allocator
32043 chooses not to do a shift in NEON).
32045 This function is used in both initial expand and post-reload splits, and
32046 handles all kinds of 64-bit shifts.
32048 Input requirements:
32049 - It is safe for the input and output to be the same register, but
32050 early-clobber rules apply for the shift amount and scratch registers.
32051 - Shift by register requires both scratch registers. In all other cases
32052 the scratch registers may be NULL.
32053 - Ashiftrt by a register also clobbers the CC register. */
32055 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
32056 rtx amount
, rtx scratch1
, rtx scratch2
)
32058 rtx out_high
= gen_highpart (SImode
, out
);
32059 rtx out_low
= gen_lowpart (SImode
, out
);
32060 rtx in_high
= gen_highpart (SImode
, in
);
32061 rtx in_low
= gen_lowpart (SImode
, in
);
32064 in = the register pair containing the input value.
32065 out = the destination register pair.
32066 up = the high- or low-part of each pair.
32067 down = the opposite part to "up".
32068 In a shift, we can consider bits to shift from "up"-stream to
32069 "down"-stream, so in a left-shift "up" is the low-part and "down"
32070 is the high-part of each register pair. */
32072 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
32073 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
32074 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
32075 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
32077 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
32079 && (REG_P (out
) || SUBREG_P (out
))
32080 && GET_MODE (out
) == DImode
);
32082 && (REG_P (in
) || SUBREG_P (in
))
32083 && GET_MODE (in
) == DImode
);
32085 && (((REG_P (amount
) || SUBREG_P (amount
))
32086 && GET_MODE (amount
) == SImode
)
32087 || CONST_INT_P (amount
)));
32088 gcc_assert (scratch1
== NULL
32089 || (GET_CODE (scratch1
) == SCRATCH
)
32090 || (GET_MODE (scratch1
) == SImode
32091 && REG_P (scratch1
)));
32092 gcc_assert (scratch2
== NULL
32093 || (GET_CODE (scratch2
) == SCRATCH
)
32094 || (GET_MODE (scratch2
) == SImode
32095 && REG_P (scratch2
)));
32096 gcc_assert (!REG_P (out
) || !REG_P (amount
)
32097 || !HARD_REGISTER_P (out
)
32098 || (REGNO (out
) != REGNO (amount
)
32099 && REGNO (out
) + 1 != REGNO (amount
)));
32101 /* Macros to make following code more readable. */
32102 #define SUB_32(DEST,SRC) \
32103 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32104 #define RSB_32(DEST,SRC) \
32105 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32106 #define SUB_S_32(DEST,SRC) \
32107 gen_addsi3_compare0 ((DEST), (SRC), \
32109 #define SET(DEST,SRC) \
32110 gen_rtx_SET ((DEST), (SRC))
32111 #define SHIFT(CODE,SRC,AMOUNT) \
32112 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32113 #define LSHIFT(CODE,SRC,AMOUNT) \
32114 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32115 SImode, (SRC), (AMOUNT))
32116 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32117 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32118 SImode, (SRC), (AMOUNT))
32120 gen_rtx_IOR (SImode, (A), (B))
32121 #define BRANCH(COND,LABEL) \
32122 gen_arm_cond_branch ((LABEL), \
32123 gen_rtx_ ## COND (CCmode, cc_reg, \
32127 /* Shifts by register and shifts by constant are handled separately. */
32128 if (CONST_INT_P (amount
))
32130 /* We have a shift-by-constant. */
32132 /* First, handle out-of-range shift amounts.
32133 In both cases we try to match the result an ARM instruction in a
32134 shift-by-register would give. This helps reduce execution
32135 differences between optimization levels, but it won't stop other
32136 parts of the compiler doing different things. This is "undefined
32137 behavior, in any case. */
32138 if (INTVAL (amount
) <= 0)
32139 emit_insn (gen_movdi (out
, in
));
32140 else if (INTVAL (amount
) >= 64)
32142 if (code
== ASHIFTRT
)
32144 rtx const31_rtx
= GEN_INT (31);
32145 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
32146 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
32149 emit_insn (gen_movdi (out
, const0_rtx
));
32152 /* Now handle valid shifts. */
32153 else if (INTVAL (amount
) < 32)
32155 /* Shifts by a constant less than 32. */
32156 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
32158 /* Clearing the out register in DImode first avoids lots
32159 of spilling and results in less stack usage.
32160 Later this redundant insn is completely removed.
32161 Do that only if "in" and "out" are different registers. */
32162 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32163 emit_insn (SET (out
, const0_rtx
));
32164 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32165 emit_insn (SET (out_down
,
32166 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
32168 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32172 /* Shifts by a constant greater than 31. */
32173 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
32175 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
32176 emit_insn (SET (out
, const0_rtx
));
32177 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
32178 if (code
== ASHIFTRT
)
32179 emit_insn (gen_ashrsi3 (out_up
, in_up
,
32182 emit_insn (SET (out_up
, const0_rtx
));
32187 /* We have a shift-by-register. */
32188 rtx cc_reg
= gen_rtx_REG (CC_NZmode
, CC_REGNUM
);
32190 /* This alternative requires the scratch registers. */
32191 gcc_assert (scratch1
&& REG_P (scratch1
));
32192 gcc_assert (scratch2
&& REG_P (scratch2
));
32194 /* We will need the values "amount-32" and "32-amount" later.
32195 Swapping them around now allows the later code to be more general. */
32199 emit_insn (SUB_32 (scratch1
, amount
));
32200 emit_insn (RSB_32 (scratch2
, amount
));
32203 emit_insn (RSB_32 (scratch1
, amount
));
32204 /* Also set CC = amount > 32. */
32205 emit_insn (SUB_S_32 (scratch2
, amount
));
32208 emit_insn (RSB_32 (scratch1
, amount
));
32209 emit_insn (SUB_32 (scratch2
, amount
));
32212 gcc_unreachable ();
32215 /* Emit code like this:
32218 out_down = in_down << amount;
32219 out_down = (in_up << (amount - 32)) | out_down;
32220 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32221 out_up = in_up << amount;
32224 out_down = in_down >> amount;
32225 out_down = (in_up << (32 - amount)) | out_down;
32227 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32228 out_up = in_up << amount;
32231 out_down = in_down >> amount;
32232 out_down = (in_up << (32 - amount)) | out_down;
32234 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32235 out_up = in_up << amount;
32237 The ARM and Thumb2 variants are the same but implemented slightly
32238 differently. If this were only called during expand we could just
32239 use the Thumb2 case and let combine do the right thing, but this
32240 can also be called from post-reload splitters. */
32242 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
32244 if (!TARGET_THUMB2
)
32246 /* Emit code for ARM mode. */
32247 emit_insn (SET (out_down
,
32248 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
32249 if (code
== ASHIFTRT
)
32251 rtx_code_label
*done_label
= gen_label_rtx ();
32252 emit_jump_insn (BRANCH (LT
, done_label
));
32253 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
32255 emit_label (done_label
);
32258 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
32263 /* Emit code for Thumb2 mode.
32264 Thumb2 can't do shift and or in one insn. */
32265 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
32266 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
32268 if (code
== ASHIFTRT
)
32270 rtx_code_label
*done_label
= gen_label_rtx ();
32271 emit_jump_insn (BRANCH (LT
, done_label
));
32272 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
32273 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
32274 emit_label (done_label
);
32278 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
32279 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
32283 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
32297 /* Returns true if the pattern is a valid symbolic address, which is either a
32298 symbol_ref or (symbol_ref + addend).
32300 According to the ARM ELF ABI, the initial addend of REL-type relocations
32301 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32302 literal field of the instruction as a 16-bit signed value in the range
32303 -32768 <= A < 32768.
32305 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32306 unsigned range of 0 <= A < 256 as described in the AAELF32
32307 relocation handling documentation: REL-type relocations are encoded
32308 as unsigned in this case. */
32311 arm_valid_symbolic_address_p (rtx addr
)
32313 rtx xop0
, xop1
= NULL_RTX
;
32316 if (target_word_relocations
)
32319 if (SYMBOL_REF_P (tmp
) || LABEL_REF_P (tmp
))
32322 /* (const (plus: symbol_ref const_int)) */
32323 if (GET_CODE (addr
) == CONST
)
32324 tmp
= XEXP (addr
, 0);
32326 if (GET_CODE (tmp
) == PLUS
)
32328 xop0
= XEXP (tmp
, 0);
32329 xop1
= XEXP (tmp
, 1);
32331 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
32333 if (TARGET_THUMB1
&& !TARGET_HAVE_MOVT
)
32334 return IN_RANGE (INTVAL (xop1
), 0, 0xff);
32336 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
32343 /* Returns true if a valid comparison operation and makes
32344 the operands in a form that is valid. */
32346 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
32348 enum rtx_code code
= GET_CODE (*comparison
);
32350 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
32351 ? GET_MODE (*op2
) : GET_MODE (*op1
);
32353 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
32355 if (code
== UNEQ
|| code
== LTGT
)
32358 code_int
= (int)code
;
32359 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
32360 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
32365 if (!arm_add_operand (*op1
, mode
))
32366 *op1
= force_reg (mode
, *op1
);
32367 if (!arm_add_operand (*op2
, mode
))
32368 *op2
= force_reg (mode
, *op2
);
32372 /* gen_compare_reg() will sort out any invalid operands. */
32376 if (!TARGET_VFP_FP16INST
)
32378 /* FP16 comparisons are done in SF mode. */
32380 *op1
= convert_to_mode (mode
, *op1
, 1);
32381 *op2
= convert_to_mode (mode
, *op2
, 1);
32382 /* Fall through. */
32385 if (!vfp_compare_operand (*op1
, mode
))
32386 *op1
= force_reg (mode
, *op1
);
32387 if (!vfp_compare_operand (*op2
, mode
))
32388 *op2
= force_reg (mode
, *op2
);
32398 /* Maximum number of instructions to set block of memory. */
32400 arm_block_set_max_insns (void)
32402 if (optimize_function_for_size_p (cfun
))
32405 return current_tune
->max_insns_inline_memset
;
32408 /* Return TRUE if it's profitable to set block of memory for
32409 non-vectorized case. VAL is the value to set the memory
32410 with. LENGTH is the number of bytes to set. ALIGN is the
32411 alignment of the destination memory in bytes. UNALIGNED_P
32412 is TRUE if we can only set the memory with instructions
32413 meeting alignment requirements. USE_STRD_P is TRUE if we
32414 can use strd to set the memory. */
32416 arm_block_set_non_vect_profit_p (rtx val
,
32417 unsigned HOST_WIDE_INT length
,
32418 unsigned HOST_WIDE_INT align
,
32419 bool unaligned_p
, bool use_strd_p
)
32422 /* For leftovers in bytes of 0-7, we can set the memory block using
32423 strb/strh/str with minimum instruction number. */
32424 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32428 num
= arm_const_inline_cost (SET
, val
);
32429 num
+= length
/ align
+ length
% align
;
32431 else if (use_strd_p
)
32433 num
= arm_const_double_inline_cost (val
);
32434 num
+= (length
>> 3) + leftover
[length
& 7];
32438 num
= arm_const_inline_cost (SET
, val
);
32439 num
+= (length
>> 2) + leftover
[length
& 3];
32442 /* We may be able to combine last pair STRH/STRB into a single STR
32443 by shifting one byte back. */
32444 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
32447 return (num
<= arm_block_set_max_insns ());
32450 /* Return TRUE if it's profitable to set block of memory for
32451 vectorized case. LENGTH is the number of bytes to set.
32452 ALIGN is the alignment of destination memory in bytes.
32453 MODE is the vector mode used to set the memory. */
32455 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
32456 unsigned HOST_WIDE_INT align
,
32460 bool unaligned_p
= ((align
& 3) != 0);
32461 unsigned int nelt
= GET_MODE_NUNITS (mode
);
32463 /* Instruction loading constant value. */
32465 /* Instructions storing the memory. */
32466 num
+= (length
+ nelt
- 1) / nelt
;
32467 /* Instructions adjusting the address expression. Only need to
32468 adjust address expression if it's 4 bytes aligned and bytes
32469 leftover can only be stored by mis-aligned store instruction. */
32470 if (!unaligned_p
&& (length
& 3) != 0)
32473 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32474 if (!unaligned_p
&& mode
== V16QImode
)
32477 return (num
<= arm_block_set_max_insns ());
32480 /* Set a block of memory using vectorization instructions for the
32481 unaligned case. We fill the first LENGTH bytes of the memory
32482 area starting from DSTBASE with byte constant VALUE. ALIGN is
32483 the alignment requirement of memory. Return TRUE if succeeded. */
32485 arm_block_set_unaligned_vect (rtx dstbase
,
32486 unsigned HOST_WIDE_INT length
,
32487 unsigned HOST_WIDE_INT value
,
32488 unsigned HOST_WIDE_INT align
)
32490 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
32493 rtx (*gen_func
) (rtx
, rtx
);
32495 unsigned HOST_WIDE_INT v
= value
;
32496 unsigned int offset
= 0;
32497 gcc_assert ((align
& 0x3) != 0);
32498 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32499 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32500 if (length
>= nelt_v16
)
32503 gen_func
= gen_movmisalignv16qi
;
32508 gen_func
= gen_movmisalignv8qi
;
32510 nelt_mode
= GET_MODE_NUNITS (mode
);
32511 gcc_assert (length
>= nelt_mode
);
32512 /* Skip if it isn't profitable. */
32513 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32516 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32517 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32519 v
= sext_hwi (v
, BITS_PER_WORD
);
32521 reg
= gen_reg_rtx (mode
);
32522 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
32523 /* Emit instruction loading the constant value. */
32524 emit_move_insn (reg
, val_vec
);
32526 /* Handle nelt_mode bytes in a vector. */
32527 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32529 emit_insn ((*gen_func
) (mem
, reg
));
32530 if (i
+ 2 * nelt_mode
<= length
)
32532 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
32533 offset
+= nelt_mode
;
32534 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32538 /* If there are not less than nelt_v8 bytes leftover, we must be in
32540 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
32542 /* Handle (8, 16) bytes leftover. */
32543 if (i
+ nelt_v8
< length
)
32545 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
32546 offset
+= length
- i
;
32547 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32549 /* We are shifting bytes back, set the alignment accordingly. */
32550 if ((length
& 1) != 0 && align
>= 2)
32551 set_mem_align (mem
, BITS_PER_UNIT
);
32553 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32555 /* Handle (0, 8] bytes leftover. */
32556 else if (i
< length
&& i
+ nelt_v8
>= length
)
32558 if (mode
== V16QImode
)
32559 reg
= gen_lowpart (V8QImode
, reg
);
32561 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
32562 + (nelt_mode
- nelt_v8
))));
32563 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
32564 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
32566 /* We are shifting bytes back, set the alignment accordingly. */
32567 if ((length
& 1) != 0 && align
>= 2)
32568 set_mem_align (mem
, BITS_PER_UNIT
);
32570 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32576 /* Set a block of memory using vectorization instructions for the
32577 aligned case. We fill the first LENGTH bytes of the memory area
32578 starting from DSTBASE with byte constant VALUE. ALIGN is the
32579 alignment requirement of memory. Return TRUE if succeeded. */
32581 arm_block_set_aligned_vect (rtx dstbase
,
32582 unsigned HOST_WIDE_INT length
,
32583 unsigned HOST_WIDE_INT value
,
32584 unsigned HOST_WIDE_INT align
)
32586 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
32587 rtx dst
, addr
, mem
;
32590 unsigned int offset
= 0;
32592 gcc_assert ((align
& 0x3) == 0);
32593 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
32594 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
32595 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
32600 nelt_mode
= GET_MODE_NUNITS (mode
);
32601 gcc_assert (length
>= nelt_mode
);
32602 /* Skip if it isn't profitable. */
32603 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
32606 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32608 reg
= gen_reg_rtx (mode
);
32609 val_vec
= gen_const_vec_duplicate (mode
, gen_int_mode (value
, QImode
));
32610 /* Emit instruction loading the constant value. */
32611 emit_move_insn (reg
, val_vec
);
32614 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32615 if (mode
== V16QImode
)
32617 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32618 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32620 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32621 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
32623 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32624 offset
+= length
- nelt_mode
;
32625 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32626 /* We are shifting bytes back, set the alignment accordingly. */
32627 if ((length
& 0x3) == 0)
32628 set_mem_align (mem
, BITS_PER_UNIT
* 4);
32629 else if ((length
& 0x1) == 0)
32630 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32632 set_mem_align (mem
, BITS_PER_UNIT
);
32634 emit_insn (gen_movmisalignv16qi (mem
, reg
));
32637 /* Fall through for bytes leftover. */
32639 nelt_mode
= GET_MODE_NUNITS (mode
);
32640 reg
= gen_lowpart (V8QImode
, reg
);
32643 /* Handle 8 bytes in a vector. */
32644 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
32646 addr
= plus_constant (Pmode
, dst
, i
);
32647 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
32648 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32649 emit_move_insn (mem
, reg
);
32651 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32654 /* Handle single word leftover by shifting 4 bytes back. We can
32655 use aligned access for this case. */
32656 if (i
+ UNITS_PER_WORD
== length
)
32658 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
32659 offset
+= i
- UNITS_PER_WORD
;
32660 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
32661 /* We are shifting 4 bytes back, set the alignment accordingly. */
32662 if (align
> UNITS_PER_WORD
)
32663 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
32665 emit_insn (gen_unaligned_storev8qi (mem
, reg
));
32667 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32668 We have to use unaligned access for this case. */
32669 else if (i
< length
)
32671 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
32672 offset
+= length
- nelt_mode
;
32673 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
32674 /* We are shifting bytes back, set the alignment accordingly. */
32675 if ((length
& 1) == 0)
32676 set_mem_align (mem
, BITS_PER_UNIT
* 2);
32678 set_mem_align (mem
, BITS_PER_UNIT
);
32680 emit_insn (gen_movmisalignv8qi (mem
, reg
));
32686 /* Set a block of memory using plain strh/strb instructions, only
32687 using instructions allowed by ALIGN on processor. We fill the
32688 first LENGTH bytes of the memory area starting from DSTBASE
32689 with byte constant VALUE. ALIGN is the alignment requirement
32692 arm_block_set_unaligned_non_vect (rtx dstbase
,
32693 unsigned HOST_WIDE_INT length
,
32694 unsigned HOST_WIDE_INT value
,
32695 unsigned HOST_WIDE_INT align
)
32698 rtx dst
, addr
, mem
;
32699 rtx val_exp
, val_reg
, reg
;
32701 HOST_WIDE_INT v
= value
;
32703 gcc_assert (align
== 1 || align
== 2);
32706 v
|= (value
<< BITS_PER_UNIT
);
32708 v
= sext_hwi (v
, BITS_PER_WORD
);
32709 val_exp
= GEN_INT (v
);
32710 /* Skip if it isn't profitable. */
32711 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32712 align
, true, false))
32715 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32716 mode
= (align
== 2 ? HImode
: QImode
);
32717 val_reg
= force_reg (SImode
, val_exp
);
32718 reg
= gen_lowpart (mode
, val_reg
);
32720 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
32722 addr
= plus_constant (Pmode
, dst
, i
);
32723 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
32724 emit_move_insn (mem
, reg
);
32727 /* Handle single byte leftover. */
32728 if (i
+ 1 == length
)
32730 reg
= gen_lowpart (QImode
, val_reg
);
32731 addr
= plus_constant (Pmode
, dst
, i
);
32732 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32733 emit_move_insn (mem
, reg
);
32737 gcc_assert (i
== length
);
32741 /* Set a block of memory using plain strd/str/strh/strb instructions,
32742 to permit unaligned copies on processors which support unaligned
32743 semantics for those instructions. We fill the first LENGTH bytes
32744 of the memory area starting from DSTBASE with byte constant VALUE.
32745 ALIGN is the alignment requirement of memory. */
32747 arm_block_set_aligned_non_vect (rtx dstbase
,
32748 unsigned HOST_WIDE_INT length
,
32749 unsigned HOST_WIDE_INT value
,
32750 unsigned HOST_WIDE_INT align
)
32753 rtx dst
, addr
, mem
;
32754 rtx val_exp
, val_reg
, reg
;
32755 unsigned HOST_WIDE_INT v
;
32758 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
32759 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
32761 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
32762 if (length
< UNITS_PER_WORD
)
32763 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
32766 v
|= (v
<< BITS_PER_WORD
);
32768 v
= sext_hwi (v
, BITS_PER_WORD
);
32770 val_exp
= GEN_INT (v
);
32771 /* Skip if it isn't profitable. */
32772 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32773 align
, false, use_strd_p
))
32778 /* Try without strd. */
32779 v
= (v
>> BITS_PER_WORD
);
32780 v
= sext_hwi (v
, BITS_PER_WORD
);
32781 val_exp
= GEN_INT (v
);
32782 use_strd_p
= false;
32783 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
32784 align
, false, use_strd_p
))
32789 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
32790 /* Handle double words using strd if possible. */
32793 val_reg
= force_reg (DImode
, val_exp
);
32795 for (; (i
+ 8 <= length
); i
+= 8)
32797 addr
= plus_constant (Pmode
, dst
, i
);
32798 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
32799 if (MEM_ALIGN (mem
) >= 2 * BITS_PER_WORD
)
32800 emit_move_insn (mem
, reg
);
32802 emit_insn (gen_unaligned_storedi (mem
, reg
));
32806 val_reg
= force_reg (SImode
, val_exp
);
32808 /* Handle words. */
32809 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
32810 for (; (i
+ 4 <= length
); i
+= 4)
32812 addr
= plus_constant (Pmode
, dst
, i
);
32813 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
32814 if ((align
& 3) == 0)
32815 emit_move_insn (mem
, reg
);
32817 emit_insn (gen_unaligned_storesi (mem
, reg
));
32820 /* Merge last pair of STRH and STRB into a STR if possible. */
32821 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
32823 addr
= plus_constant (Pmode
, dst
, i
- 1);
32824 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
32825 /* We are shifting one byte back, set the alignment accordingly. */
32826 if ((align
& 1) == 0)
32827 set_mem_align (mem
, BITS_PER_UNIT
);
32829 /* Most likely this is an unaligned access, and we can't tell at
32830 compilation time. */
32831 emit_insn (gen_unaligned_storesi (mem
, reg
));
32835 /* Handle half word leftover. */
32836 if (i
+ 2 <= length
)
32838 reg
= gen_lowpart (HImode
, val_reg
);
32839 addr
= plus_constant (Pmode
, dst
, i
);
32840 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
32841 if ((align
& 1) == 0)
32842 emit_move_insn (mem
, reg
);
32844 emit_insn (gen_unaligned_storehi (mem
, reg
));
32849 /* Handle single byte leftover. */
32850 if (i
+ 1 == length
)
32852 reg
= gen_lowpart (QImode
, val_reg
);
32853 addr
= plus_constant (Pmode
, dst
, i
);
32854 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
32855 emit_move_insn (mem
, reg
);
32861 /* Set a block of memory using vectorization instructions for both
32862 aligned and unaligned cases. We fill the first LENGTH bytes of
32863 the memory area starting from DSTBASE with byte constant VALUE.
32864 ALIGN is the alignment requirement of memory. */
32866 arm_block_set_vect (rtx dstbase
,
32867 unsigned HOST_WIDE_INT length
,
32868 unsigned HOST_WIDE_INT value
,
32869 unsigned HOST_WIDE_INT align
)
32871 /* Check whether we need to use unaligned store instruction. */
32872 if (((align
& 3) != 0 || (length
& 3) != 0)
32873 /* Check whether unaligned store instruction is available. */
32874 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
32877 if ((align
& 3) == 0)
32878 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
32880 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
32883 /* Expand string store operation. Firstly we try to do that by using
32884 vectorization instructions, then try with ARM unaligned access and
32885 double-word store if profitable. OPERANDS[0] is the destination,
32886 OPERANDS[1] is the number of bytes, operands[2] is the value to
32887 initialize the memory, OPERANDS[3] is the known alignment of the
32890 arm_gen_setmem (rtx
*operands
)
32892 rtx dstbase
= operands
[0];
32893 unsigned HOST_WIDE_INT length
;
32894 unsigned HOST_WIDE_INT value
;
32895 unsigned HOST_WIDE_INT align
;
32897 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
32900 length
= UINTVAL (operands
[1]);
32904 value
= (UINTVAL (operands
[2]) & 0xFF);
32905 align
= UINTVAL (operands
[3]);
32906 if (TARGET_NEON
&& length
>= 8
32907 && current_tune
->string_ops_prefer_neon
32908 && arm_block_set_vect (dstbase
, length
, value
, align
))
32911 if (!unaligned_access
&& (align
& 3) != 0)
32912 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
32914 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
32919 arm_macro_fusion_p (void)
32921 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
32924 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32925 for MOVW / MOVT macro fusion. */
32928 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
32930 /* We are trying to fuse
32931 movw imm / movt imm
32932 instructions as a group that gets scheduled together. */
32934 rtx set_dest
= SET_DEST (curr_set
);
32936 if (GET_MODE (set_dest
) != SImode
)
32939 /* We are trying to match:
32940 prev (movw) == (set (reg r0) (const_int imm16))
32941 curr (movt) == (set (zero_extract (reg r0)
32944 (const_int imm16_1))
32946 prev (movw) == (set (reg r1)
32947 (high (symbol_ref ("SYM"))))
32948 curr (movt) == (set (reg r0)
32950 (symbol_ref ("SYM")))) */
32952 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
32954 if (CONST_INT_P (SET_SRC (curr_set
))
32955 && CONST_INT_P (SET_SRC (prev_set
))
32956 && REG_P (XEXP (set_dest
, 0))
32957 && REG_P (SET_DEST (prev_set
))
32958 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
32962 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
32963 && REG_P (SET_DEST (curr_set
))
32964 && REG_P (SET_DEST (prev_set
))
32965 && GET_CODE (SET_SRC (prev_set
)) == HIGH
32966 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
32973 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
32975 rtx prev_set
= single_set (prev
);
32976 rtx curr_set
= single_set (curr
);
32982 if (any_condjump_p (curr
))
32985 if (!arm_macro_fusion_p ())
32988 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
32989 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
32995 /* Return true iff the instruction fusion described by OP is enabled. */
32997 arm_fusion_enabled_p (tune_params::fuse_ops op
)
32999 return current_tune
->fusible_ops
& op
;
33002 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33003 scheduled for speculative execution. Reject the long-running division
33004 and square-root instructions. */
33007 arm_sched_can_speculate_insn (rtx_insn
*insn
)
33009 switch (get_attr_type (insn
))
33017 case TYPE_NEON_FP_SQRT_S
:
33018 case TYPE_NEON_FP_SQRT_D
:
33019 case TYPE_NEON_FP_SQRT_S_Q
:
33020 case TYPE_NEON_FP_SQRT_D_Q
:
33021 case TYPE_NEON_FP_DIV_S
:
33022 case TYPE_NEON_FP_DIV_D
:
33023 case TYPE_NEON_FP_DIV_S_Q
:
33024 case TYPE_NEON_FP_DIV_D_Q
:
33031 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33033 static unsigned HOST_WIDE_INT
33034 arm_asan_shadow_offset (void)
33036 return HOST_WIDE_INT_1U
<< 29;
33040 /* This is a temporary fix for PR60655. Ideally we need
33041 to handle most of these cases in the generic part but
33042 currently we reject minus (..) (sym_ref). We try to
33043 ameliorate the case with minus (sym_ref1) (sym_ref2)
33044 where they are in the same section. */
33047 arm_const_not_ok_for_debug_p (rtx p
)
33049 tree decl_op0
= NULL
;
33050 tree decl_op1
= NULL
;
33052 if (GET_CODE (p
) == UNSPEC
)
33054 if (GET_CODE (p
) == MINUS
)
33056 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
33058 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
33060 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
33061 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
33063 if ((VAR_P (decl_op1
)
33064 || TREE_CODE (decl_op1
) == CONST_DECL
)
33065 && (VAR_P (decl_op0
)
33066 || TREE_CODE (decl_op0
) == CONST_DECL
))
33067 return (get_variable_section (decl_op1
, false)
33068 != get_variable_section (decl_op0
, false));
33070 if (TREE_CODE (decl_op1
) == LABEL_DECL
33071 && TREE_CODE (decl_op0
) == LABEL_DECL
)
33072 return (DECL_CONTEXT (decl_op1
)
33073 != DECL_CONTEXT (decl_op0
));
33083 /* return TRUE if x is a reference to a value in a constant pool */
33085 arm_is_constant_pool_ref (rtx x
)
33088 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
33089 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
33092 /* Remember the last target of arm_set_current_function. */
33093 static GTY(()) tree arm_previous_fndecl
;
33095 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33098 save_restore_target_globals (tree new_tree
)
33100 /* If we have a previous state, use it. */
33101 if (TREE_TARGET_GLOBALS (new_tree
))
33102 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
33103 else if (new_tree
== target_option_default_node
)
33104 restore_target_globals (&default_target_globals
);
33107 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33108 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
33111 arm_option_params_internal ();
33114 /* Invalidate arm_previous_fndecl. */
33117 arm_reset_previous_fndecl (void)
33119 arm_previous_fndecl
= NULL_TREE
;
33122 /* Establish appropriate back-end context for processing the function
33123 FNDECL. The argument might be NULL to indicate processing at top
33124 level, outside of any function scope. */
33127 arm_set_current_function (tree fndecl
)
33129 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
33132 tree old_tree
= (arm_previous_fndecl
33133 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
33136 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33138 /* If current function has no attributes but previous one did,
33139 use the default node. */
33140 if (! new_tree
&& old_tree
)
33141 new_tree
= target_option_default_node
;
33143 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33144 the default have been handled by save_restore_target_globals from
33145 arm_pragma_target_parse. */
33146 if (old_tree
== new_tree
)
33149 arm_previous_fndecl
= fndecl
;
33151 /* First set the target options. */
33152 cl_target_option_restore (&global_options
, &global_options_set
,
33153 TREE_TARGET_OPTION (new_tree
));
33155 save_restore_target_globals (new_tree
);
33157 arm_override_options_after_change_1 (&global_options
, &global_options_set
);
33160 /* Implement TARGET_OPTION_PRINT. */
33163 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
33165 int flags
= ptr
->x_target_flags
;
33166 const char *fpu_name
;
33168 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
33169 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
33171 fprintf (file
, "%*sselected isa %s\n", indent
, "",
33172 TARGET_THUMB2_P (flags
) ? "thumb2" :
33173 TARGET_THUMB_P (flags
) ? "thumb1" :
33176 if (ptr
->x_arm_arch_string
)
33177 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
33178 ptr
->x_arm_arch_string
);
33180 if (ptr
->x_arm_cpu_string
)
33181 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
33182 ptr
->x_arm_cpu_string
);
33184 if (ptr
->x_arm_tune_string
)
33185 fprintf (file
, "%*sselected tune %s\n", indent
, "",
33186 ptr
->x_arm_tune_string
);
33188 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
33191 /* Hook to determine if one function can safely inline another. */
33194 arm_can_inline_p (tree caller
, tree callee
)
33196 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
33197 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
33198 bool can_inline
= true;
33200 struct cl_target_option
*caller_opts
33201 = TREE_TARGET_OPTION (caller_tree
? caller_tree
33202 : target_option_default_node
);
33204 struct cl_target_option
*callee_opts
33205 = TREE_TARGET_OPTION (callee_tree
? callee_tree
33206 : target_option_default_node
);
33208 if (callee_opts
== caller_opts
)
33211 /* Callee's ISA features should be a subset of the caller's. */
33212 struct arm_build_target caller_target
;
33213 struct arm_build_target callee_target
;
33214 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
33215 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
33217 arm_configure_build_target (&caller_target
, caller_opts
, false);
33218 arm_configure_build_target (&callee_target
, callee_opts
, false);
33219 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
33220 can_inline
= false;
33222 sbitmap_free (caller_target
.isa
);
33223 sbitmap_free (callee_target
.isa
);
33225 /* OK to inline between different modes.
33226 Function with mode specific instructions, e.g using asm,
33227 must be explicitly protected with noinline. */
33231 /* Hook to fix function's alignment affected by target attribute. */
33234 arm_relayout_function (tree fndecl
)
33236 if (DECL_USER_ALIGN (fndecl
))
33239 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
33242 callee_tree
= target_option_default_node
;
33244 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
33247 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
33250 /* Inner function to process the attribute((target(...))), take an argument and
33251 set the current options from the argument. If we have a list, recursively
33252 go over the list. */
33255 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
33257 if (TREE_CODE (args
) == TREE_LIST
)
33261 for (; args
; args
= TREE_CHAIN (args
))
33262 if (TREE_VALUE (args
)
33263 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
33268 else if (TREE_CODE (args
) != STRING_CST
)
33270 error ("attribute %<target%> argument not a string");
33274 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
33277 while ((q
= strtok (argstr
, ",")) != NULL
)
33280 if (!strcmp (q
, "thumb"))
33282 opts
->x_target_flags
|= MASK_THUMB
;
33283 if (TARGET_FDPIC
&& !arm_arch_thumb2
)
33284 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33287 else if (!strcmp (q
, "arm"))
33288 opts
->x_target_flags
&= ~MASK_THUMB
;
33290 else if (!strcmp (q
, "general-regs-only"))
33291 opts
->x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
33293 else if (startswith (q
, "fpu="))
33296 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+ 4,
33297 &fpu_index
, CL_TARGET
))
33299 error ("invalid fpu for target attribute or pragma %qs", q
);
33302 if (fpu_index
== TARGET_FPU_auto
)
33304 /* This doesn't really make sense until we support
33305 general dynamic selection of the architecture and all
33307 sorry ("auto fpu selection not currently permitted here");
33310 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
33312 else if (startswith (q
, "arch="))
33314 char *arch
= q
+ 5;
33315 const arch_option
*arm_selected_arch
33316 = arm_parse_arch_option_name (all_architectures
, "arch", arch
);
33318 if (!arm_selected_arch
)
33320 error ("invalid architecture for target attribute or pragma %qs",
33325 opts
->x_arm_arch_string
= xstrndup (arch
, strlen (arch
));
33327 else if (q
[0] == '+')
33329 opts
->x_arm_arch_string
33330 = xasprintf ("%s%s", opts
->x_arm_arch_string
, q
);
33334 error ("unknown target attribute or pragma %qs", q
);
33342 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33345 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
33346 struct gcc_options
*opts_set
)
33348 struct cl_target_option cl_opts
;
33350 if (!arm_valid_target_attribute_rec (args
, opts
))
33353 cl_target_option_save (&cl_opts
, opts
, opts_set
);
33354 arm_configure_build_target (&arm_active_target
, &cl_opts
, false);
33355 arm_option_check_internal (opts
);
33356 /* Do any overrides, such as global options arch=xxx.
33357 We do this since arm_active_target was overridden. */
33358 arm_option_reconfigure_globals ();
33359 arm_options_perform_arch_sanity_checks ();
33360 arm_option_override_internal (opts
, opts_set
);
33362 return build_target_option_node (opts
, opts_set
);
33366 add_attribute (const char * mode
, tree
*attributes
)
33368 size_t len
= strlen (mode
);
33369 tree value
= build_string (len
, mode
);
33371 TREE_TYPE (value
) = build_array_type (char_type_node
,
33372 build_index_type (size_int (len
)));
33374 *attributes
= tree_cons (get_identifier ("target"),
33375 build_tree_list (NULL_TREE
, value
),
33379 /* For testing. Insert thumb or arm modes alternatively on functions. */
33382 arm_insert_attributes (tree fndecl
, tree
* attributes
)
33386 if (! TARGET_FLIP_THUMB
)
33389 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
33390 || fndecl_built_in_p (fndecl
) || DECL_ARTIFICIAL (fndecl
))
33393 /* Nested definitions must inherit mode. */
33394 if (current_function_decl
)
33396 mode
= TARGET_THUMB
? "thumb" : "arm";
33397 add_attribute (mode
, attributes
);
33401 /* If there is already a setting don't change it. */
33402 if (lookup_attribute ("target", *attributes
) != NULL
)
33405 mode
= thumb_flipper
? "thumb" : "arm";
33406 add_attribute (mode
, attributes
);
33408 thumb_flipper
= !thumb_flipper
;
33411 /* Hook to validate attribute((target("string"))). */
33414 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
33415 tree args
, int ARG_UNUSED (flags
))
33418 struct gcc_options func_options
, func_options_set
;
33419 tree cur_tree
, new_optimize
;
33420 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
33422 /* Get the optimization options of the current function. */
33423 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
33425 /* If the function changed the optimization levels as well as setting target
33426 options, start with the optimizations specified. */
33427 if (!func_optimize
)
33428 func_optimize
= optimization_default_node
;
33430 /* Init func_options. */
33431 memset (&func_options
, 0, sizeof (func_options
));
33432 init_options_struct (&func_options
, NULL
);
33433 lang_hooks
.init_options_struct (&func_options
);
33434 memset (&func_options_set
, 0, sizeof (func_options_set
));
33436 /* Initialize func_options to the defaults. */
33437 cl_optimization_restore (&func_options
, &func_options_set
,
33438 TREE_OPTIMIZATION (func_optimize
));
33440 cl_target_option_restore (&func_options
, &func_options_set
,
33441 TREE_TARGET_OPTION (target_option_default_node
));
33443 /* Set func_options flags with new target mode. */
33444 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
33445 &func_options_set
);
33447 if (cur_tree
== NULL_TREE
)
33450 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
33452 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
33454 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
33459 /* Match an ISA feature bitmap to a named FPU. We always use the
33460 first entry that exactly matches the feature set, so that we
33461 effectively canonicalize the FPU name for the assembler. */
33463 arm_identify_fpu_from_isa (sbitmap isa
)
33465 auto_sbitmap
fpubits (isa_num_bits
);
33466 auto_sbitmap
cand_fpubits (isa_num_bits
);
33468 bitmap_and (fpubits
, isa
, isa_all_fpubits_internal
);
33470 /* If there are no ISA feature bits relating to the FPU, we must be
33471 doing soft-float. */
33472 if (bitmap_empty_p (fpubits
))
33475 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
33477 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
33478 if (bitmap_equal_p (fpubits
, cand_fpubits
))
33479 return all_fpus
[i
].name
;
33481 /* We must find an entry, or things have gone wrong. */
33482 gcc_unreachable ();
33485 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33486 by the function fndecl. */
33488 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
33490 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
33492 struct cl_target_option
*targ_options
;
33494 targ_options
= TREE_TARGET_OPTION (target_parts
);
33496 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
33497 gcc_assert (targ_options
);
33499 arm_print_asm_arch_directives (stream
, targ_options
);
33501 fprintf (stream
, "\t.syntax unified\n");
33505 if (is_called_in_ARM_mode (decl
)
33506 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
33507 && cfun
->is_thunk
))
33508 fprintf (stream
, "\t.code 32\n");
33509 else if (TARGET_THUMB1
)
33510 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
33512 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
33515 fprintf (stream
, "\t.arm\n");
33517 if (TARGET_POKE_FUNCTION_NAME
)
33518 arm_poke_function_name (stream
, (const char *) name
);
33521 /* If MEM is in the form of [base+offset], extract the two parts
33522 of address and set to BASE and OFFSET, otherwise return false
33523 after clearing BASE and OFFSET. */
33526 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
33530 gcc_assert (MEM_P (mem
));
33532 addr
= XEXP (mem
, 0);
33534 /* Strip off const from addresses like (const (addr)). */
33535 if (GET_CODE (addr
) == CONST
)
33536 addr
= XEXP (addr
, 0);
33541 *offset
= const0_rtx
;
33545 if (GET_CODE (addr
) == PLUS
33546 && GET_CODE (XEXP (addr
, 0)) == REG
33547 && CONST_INT_P (XEXP (addr
, 1)))
33549 *base
= XEXP (addr
, 0);
33550 *offset
= XEXP (addr
, 1);
33555 *offset
= NULL_RTX
;
33560 /* If INSN is a load or store of address in the form of [base+offset],
33561 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33562 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33563 otherwise return FALSE. */
33566 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
33570 gcc_assert (INSN_P (insn
));
33571 x
= PATTERN (insn
);
33572 if (GET_CODE (x
) != SET
)
33576 dest
= SET_DEST (x
);
33577 if (REG_P (src
) && MEM_P (dest
))
33580 extract_base_offset_in_addr (dest
, base
, offset
);
33582 else if (MEM_P (src
) && REG_P (dest
))
33585 extract_base_offset_in_addr (src
, base
, offset
);
33590 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
33593 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33595 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33596 and PRI are only calculated for these instructions. For other instruction,
33597 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33598 instruction fusion can be supported by returning different priorities.
33600 It's important that irrelevant instructions get the largest FUSION_PRI. */
33603 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
33604 int *fusion_pri
, int *pri
)
33610 gcc_assert (INSN_P (insn
));
33613 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
33620 /* Load goes first. */
33622 *fusion_pri
= tmp
- 1;
33624 *fusion_pri
= tmp
- 2;
33628 /* INSN with smaller base register goes first. */
33629 tmp
-= ((REGNO (base
) & 0xff) << 20);
33631 /* INSN with smaller offset goes first. */
33632 off_val
= (int)(INTVAL (offset
));
33634 tmp
-= (off_val
& 0xfffff);
33636 tmp
+= ((- off_val
) & 0xfffff);
33643 /* Construct and return a PARALLEL RTX vector with elements numbering the
33644 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33645 the vector - from the perspective of the architecture. This does not
33646 line up with GCC's perspective on lane numbers, so we end up with
33647 different masks depending on our target endian-ness. The diagram
33648 below may help. We must draw the distinction when building masks
33649 which select one half of the vector. An instruction selecting
33650 architectural low-lanes for a big-endian target, must be described using
33651 a mask selecting GCC high-lanes.
33653 Big-Endian Little-Endian
33655 GCC 0 1 2 3 3 2 1 0
33656 | x | x | x | x | | x | x | x | x |
33657 Architecture 3 2 1 0 3 2 1 0
33659 Low Mask: { 2, 3 } { 0, 1 }
33660 High Mask: { 0, 1 } { 2, 3 }
33664 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
33666 int nunits
= GET_MODE_NUNITS (mode
);
33667 rtvec v
= rtvec_alloc (nunits
/ 2);
33668 int high_base
= nunits
/ 2;
33674 if (BYTES_BIG_ENDIAN
)
33675 base
= high
? low_base
: high_base
;
33677 base
= high
? high_base
: low_base
;
33679 for (i
= 0; i
< nunits
/ 2; i
++)
33680 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
33682 t1
= gen_rtx_PARALLEL (mode
, v
);
33686 /* Check OP for validity as a PARALLEL RTX vector with elements
33687 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33688 from the perspective of the architecture. See the diagram above
33689 arm_simd_vect_par_cnst_half_p for more details. */
33692 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
33695 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
33696 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
33697 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
33700 if (!VECTOR_MODE_P (mode
))
33703 if (count_op
!= count_ideal
)
33706 for (i
= 0; i
< count_ideal
; i
++)
33708 rtx elt_op
= XVECEXP (op
, 0, i
);
33709 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
33711 if (!CONST_INT_P (elt_op
)
33712 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
33718 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33721 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
33724 /* For now, we punt and not handle this for TARGET_THUMB1. */
33725 if (vcall_offset
&& TARGET_THUMB1
)
33728 /* Otherwise ok. */
33732 /* Generate RTL for a conditional branch with rtx comparison CODE in
33733 mode CC_MODE. The destination of the unlikely conditional branch
33737 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
33741 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
33742 gen_rtx_REG (cc_mode
, CC_REGNUM
),
33745 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
33746 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
33748 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
33751 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33753 For pure-code sections there is no letter code for this attribute, so
33754 output all the section flags numerically when this is needed. */
33757 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
33760 if (flags
& SECTION_ARM_PURECODE
)
33764 if (!(flags
& SECTION_DEBUG
))
33766 if (flags
& SECTION_EXCLUDE
)
33767 *num
|= 0x80000000;
33768 if (flags
& SECTION_WRITE
)
33770 if (flags
& SECTION_CODE
)
33772 if (flags
& SECTION_MERGE
)
33774 if (flags
& SECTION_STRINGS
)
33776 if (flags
& SECTION_TLS
)
33778 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
33787 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33789 If pure-code is passed as an option, make sure all functions are in
33790 sections that have the SHF_ARM_PURECODE attribute. */
33793 arm_function_section (tree decl
, enum node_frequency freq
,
33794 bool startup
, bool exit
)
33796 const char * section_name
;
33799 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
33800 return default_function_section (decl
, freq
, startup
, exit
);
33802 if (!target_pure_code
)
33803 return default_function_section (decl
, freq
, startup
, exit
);
33806 section_name
= DECL_SECTION_NAME (decl
);
33808 /* If a function is not in a named section then it falls under the 'default'
33809 text section, also known as '.text'. We can preserve previous behavior as
33810 the default text section already has the SHF_ARM_PURECODE section
33814 section
*default_sec
= default_function_section (decl
, freq
, startup
,
33817 /* If default_sec is not null, then it must be a special section like for
33818 example .text.startup. We set the pure-code attribute and return the
33819 same section to preserve existing behavior. */
33821 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33822 return default_sec
;
33825 /* Otherwise look whether a section has already been created with
33827 sec
= get_named_section (decl
, section_name
, 0);
33829 /* If that is not the case passing NULL as the section's name to
33830 'get_named_section' will create a section with the declaration's
33832 sec
= get_named_section (decl
, NULL
, 0);
33834 /* Set the SHF_ARM_PURECODE attribute. */
33835 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
33840 /* Implements the TARGET_SECTION_FLAGS hook.
33842 If DECL is a function declaration and pure-code is passed as an option
33843 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33844 section's name and RELOC indicates whether the declarations initializer may
33845 contain runtime relocations. */
33847 static unsigned int
33848 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
33850 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
33852 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
33853 flags
|= SECTION_ARM_PURECODE
;
33858 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33861 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
33863 rtx
*quot_p
, rtx
*rem_p
)
33865 if (mode
== SImode
)
33866 gcc_assert (!TARGET_IDIV
);
33868 scalar_int_mode libval_mode
33869 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
33871 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
33872 libval_mode
, op0
, mode
, op1
, mode
);
33874 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
33875 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
33876 GET_MODE_SIZE (mode
));
33878 gcc_assert (quotient
);
33879 gcc_assert (remainder
);
33881 *quot_p
= quotient
;
33882 *rem_p
= remainder
;
33885 /* This function checks for the availability of the coprocessor builtin passed
33886 in BUILTIN for the current target. Returns true if it is available and
33887 false otherwise. If a BUILTIN is passed for which this function has not
33888 been implemented it will cause an exception. */
33891 arm_coproc_builtin_available (enum unspecv builtin
)
33893 /* None of these builtins are available in Thumb mode if the target only
33894 supports Thumb-1. */
33912 case VUNSPEC_LDC2L
:
33914 case VUNSPEC_STC2L
:
33917 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33924 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33926 if (arm_arch6
|| arm_arch5te
)
33929 case VUNSPEC_MCRR2
:
33930 case VUNSPEC_MRRC2
:
33935 gcc_unreachable ();
33940 /* This function returns true if OP is a valid memory operand for the ldc and
33941 stc coprocessor instructions and false otherwise. */
33944 arm_coproc_ldc_stc_legitimate_address (rtx op
)
33946 HOST_WIDE_INT range
;
33947 /* Has to be a memory operand. */
33953 /* We accept registers. */
33957 switch GET_CODE (op
)
33961 /* Or registers with an offset. */
33962 if (!REG_P (XEXP (op
, 0)))
33967 /* The offset must be an immediate though. */
33968 if (!CONST_INT_P (op
))
33971 range
= INTVAL (op
);
33973 /* Within the range of [-1020,1020]. */
33974 if (!IN_RANGE (range
, -1020, 1020))
33977 /* And a multiple of 4. */
33978 return (range
% 4) == 0;
33984 return REG_P (XEXP (op
, 0));
33986 gcc_unreachable ();
33991 /* Return the diagnostic message string if conversion from FROMTYPE to
33992 TOTYPE is not allowed, NULL otherwise. */
33994 static const char *
33995 arm_invalid_conversion (const_tree fromtype
, const_tree totype
)
33997 if (element_mode (fromtype
) != element_mode (totype
))
33999 /* Do no allow conversions to/from BFmode scalar types. */
34000 if (TYPE_MODE (fromtype
) == BFmode
)
34001 return N_("invalid conversion from type %<bfloat16_t%>");
34002 if (TYPE_MODE (totype
) == BFmode
)
34003 return N_("invalid conversion to type %<bfloat16_t%>");
34006 /* Conversion allowed. */
34010 /* Return the diagnostic message string if the unary operation OP is
34011 not permitted on TYPE, NULL otherwise. */
34013 static const char *
34014 arm_invalid_unary_op (int op
, const_tree type
)
34016 /* Reject all single-operand operations on BFmode except for &. */
34017 if (element_mode (type
) == BFmode
&& op
!= ADDR_EXPR
)
34018 return N_("operation not permitted on type %<bfloat16_t%>");
34020 /* Operation allowed. */
34024 /* Return the diagnostic message string if the binary operation OP is
34025 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34027 static const char *
34028 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
,
34031 /* Reject all 2-operand operations on BFmode. */
34032 if (element_mode (type1
) == BFmode
34033 || element_mode (type2
) == BFmode
)
34034 return N_("operation not permitted on type %<bfloat16_t%>");
34036 /* Operation allowed. */
34040 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34042 In VFPv1, VFP registers could only be accessed in the mode they were
34043 set, so subregs would be invalid there. However, we don't support
34044 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34046 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34047 VFP registers in little-endian order. We can't describe that accurately to
34048 GCC, so avoid taking subregs of such values.
34050 The only exception is going from a 128-bit to a 64-bit type. In that
34051 case the data layout happens to be consistent for big-endian, so we
34052 explicitly allow that case. */
34055 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
34056 reg_class_t rclass
)
34059 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
34060 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
34061 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
34062 && reg_classes_intersect_p (VFP_REGS
, rclass
))
34067 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34068 strcpy from constants will be faster. */
34070 static HOST_WIDE_INT
34071 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
34073 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
34074 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
34075 return MAX (align
, BITS_PER_WORD
* factor
);
34079 /* Emit a speculation barrier on target architectures that do not have
34080 DSB/ISB directly. Such systems probably don't need a barrier
34081 themselves, but if the code is ever run on a later architecture, it
34082 might become a problem. */
34084 arm_emit_speculation_barrier_function ()
34086 emit_library_call (speculation_barrier_libfunc
, LCT_NORMAL
, VOIDmode
);
34089 /* Have we recorded an explicit access to the Q bit of APSR?. */
34091 arm_q_bit_access (void)
34093 if (cfun
&& cfun
->decl
)
34094 return lookup_attribute ("acle qbit",
34095 DECL_ATTRIBUTES (cfun
->decl
));
34099 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34101 arm_ge_bits_access (void)
34103 if (cfun
&& cfun
->decl
)
34104 return lookup_attribute ("acle gebits",
34105 DECL_ATTRIBUTES (cfun
->decl
));
34109 /* NULL if insn INSN is valid within a low-overhead loop.
34110 Otherwise return why doloop cannot be applied. */
34112 static const char *
34113 arm_invalid_within_doloop (const rtx_insn
*insn
)
34115 if (!TARGET_HAVE_LOB
)
34116 return default_invalid_within_doloop (insn
);
34119 return "Function call in the loop.";
34121 if (reg_mentioned_p (gen_rtx_REG (SImode
, LR_REGNUM
), insn
))
34122 return "LR is used inside loop.";
34128 arm_target_insn_ok_for_lob (rtx insn
)
34130 basic_block bb
= BLOCK_FOR_INSN (insn
);
34131 /* Make sure the basic block of the target insn is a simple latch
34132 having as single predecessor and successor the body of the loop
34133 itself. Only simple loops with a single basic block as body are
34134 supported for 'low over head loop' making sure that LE target is
34135 above LE itself in the generated code. */
34137 return single_succ_p (bb
)
34138 && single_pred_p (bb
)
34139 && single_succ_edge (bb
)->dest
== single_pred_edge (bb
)->src
34140 && contains_no_active_insn_p (bb
);
34144 namespace selftest
{
34146 /* Scan the static data tables generated by parsecpu.awk looking for
34147 potential issues with the data. We primarily check for
34148 inconsistencies in the option extensions at present (extensions
34149 that duplicate others but aren't marked as aliases). Furthermore,
34150 for correct canonicalization later options must never be a subset
34151 of an earlier option. Any extension should also only specify other
34152 feature bits and never an architecture bit. The architecture is inferred
34153 from the declaration of the extension. */
34155 arm_test_cpu_arch_data (void)
34157 const arch_option
*arch
;
34158 const cpu_option
*cpu
;
34159 auto_sbitmap
target_isa (isa_num_bits
);
34160 auto_sbitmap
isa1 (isa_num_bits
);
34161 auto_sbitmap
isa2 (isa_num_bits
);
34163 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
34165 const cpu_arch_extension
*ext1
, *ext2
;
34167 if (arch
->common
.extensions
== NULL
)
34170 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34172 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34177 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34178 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34180 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34183 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34184 /* If the option is a subset of the parent option, it doesn't
34185 add anything and so isn't useful. */
34186 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34188 /* If the extension specifies any architectural bits then
34189 disallow it. Extensions should only specify feature bits. */
34190 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34195 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
34197 const cpu_arch_extension
*ext1
, *ext2
;
34199 if (cpu
->common
.extensions
== NULL
)
34202 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
34204 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
34209 arm_initialize_isa (isa1
, ext1
->isa_bits
);
34210 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
34212 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
34215 arm_initialize_isa (isa2
, ext2
->isa_bits
);
34216 /* If the option is a subset of the parent option, it doesn't
34217 add anything and so isn't useful. */
34218 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
34220 /* If the extension specifies any architectural bits then
34221 disallow it. Extensions should only specify feature bits. */
34222 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
34228 /* Scan the static data tables generated by parsecpu.awk looking for
34229 potential issues with the data. Here we check for consistency between the
34230 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34231 a feature bit that is not defined by any FPU flag. */
34233 arm_test_fpu_data (void)
34235 auto_sbitmap
isa_all_fpubits_internal (isa_num_bits
);
34236 auto_sbitmap
fpubits (isa_num_bits
);
34237 auto_sbitmap
tmpset (isa_num_bits
);
34239 static const enum isa_feature fpu_bitlist_internal
[]
34240 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
34241 arm_initialize_isa (isa_all_fpubits_internal
, fpu_bitlist_internal
);
34243 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
34245 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
34246 bitmap_and_compl (tmpset
, isa_all_fpubits_internal
, fpubits
);
34247 bitmap_clear (isa_all_fpubits_internal
);
34248 bitmap_copy (isa_all_fpubits_internal
, tmpset
);
34251 if (!bitmap_empty_p (isa_all_fpubits_internal
))
34253 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
34254 " group that are not defined by any FPU.\n"
34255 " Check your arm-cpus.in.\n");
34256 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal
));
34261 arm_run_selftests (void)
34263 arm_test_cpu_arch_data ();
34264 arm_test_fpu_data ();
34266 } /* Namespace selftest. */
34268 #undef TARGET_RUN_TARGET_SELFTESTS
34269 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34270 #endif /* CHECKING_P */
34272 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34273 global variable based guard use the default else
34274 return a null tree. */
34276 arm_stack_protect_guard (void)
34278 if (arm_stack_protector_guard
== SSP_GLOBAL
)
34279 return default_stack_protect_guard ();
34284 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34285 Unlike the arm version, we do NOT implement asm flag outputs. */
34288 thumb1_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> & /*inputs*/,
34289 vec
<machine_mode
> & /*input_modes*/,
34290 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
34291 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
34293 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
34294 if (startswith (constraints
[i
], "=@cc"))
34296 sorry ("%<asm%> flags not supported in thumb1 mode");
34302 /* Generate code to enable conditional branches in functions over 1 MiB.
34304 operands: is the operands list of the asm insn (see arm_cond_branch or
34305 arm_cond_branch_reversed).
34306 pos_label: is an index into the operands array where operands[pos_label] is
34307 the asm label of the final jump destination.
34308 dest: is a string which is used to generate the asm label of the intermediate
34310 branch_format: is a string denoting the intermediate branch format, e.g.
34311 "beq", "bne", etc. */
34314 arm_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
34315 const char * branch_format
)
34317 rtx_code_label
* tmp_label
= gen_label_rtx ();
34318 char label_buf
[256];
34320 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
, \
34321 CODE_LABEL_NUMBER (tmp_label
));
34322 const char *label_ptr
= arm_strip_name_encoding (label_buf
);
34323 rtx dest_label
= operands
[pos_label
];
34324 operands
[pos_label
] = tmp_label
;
34326 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
34327 output_asm_insn (buffer
, operands
);
34329 snprintf (buffer
, sizeof (buffer
), "b\t%%l0%d\n%s:", pos_label
, label_ptr
);
34330 operands
[pos_label
] = dest_label
;
34331 output_asm_insn (buffer
, operands
);
34335 /* If given mode matches, load from memory to LO_REGS.
34336 (i.e [Rn], Rn <= LO_REGS). */
34338 arm_mode_base_reg_class (machine_mode mode
)
34340 if (TARGET_HAVE_MVE
34341 && (mode
== E_V8QImode
|| mode
== E_V4QImode
|| mode
== E_V4HImode
))
34344 return MODE_BASE_REG_REG_CLASS (mode
);
34347 struct gcc_target targetm
= TARGET_INITIALIZER
;
34349 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34352 arm_get_mask_mode (machine_mode mode
)
34354 if (TARGET_HAVE_MVE
)
34355 return arm_mode_to_pred_mode (mode
);
34357 return default_get_mask_mode (mode
);
34360 #include "gt-arm.h"