1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic-core.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
62 #include "target-globals.h"
64 #include "tm-constrs.h"
66 #include "optabs-libfuncs.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode
;
76 typedef struct minipool_fixup Mfix
;
78 void (*arm_lang_output_object_attributes_hook
)(void);
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx
);
87 static int arm_needs_doubleword_align (machine_mode
, const_tree
);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets
*arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code
, machine_mode
, rtx
,
93 unsigned HOST_WIDE_INT
, rtx
, rtx
, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap
);
96 static int arm_address_register_rtx_p (rtx
, int);
97 static int arm_legitimate_index_p (machine_mode
, rtx
, RTX_CODE
, int);
98 static bool is_called_in_ARM_mode (tree
);
99 static int thumb2_legitimate_index_p (machine_mode
, rtx
, int);
100 static int thumb1_base_register_rtx_p (rtx
, machine_mode
, int);
101 static rtx
arm_legitimize_address (rtx
, rtx
, machine_mode
);
102 static reg_class_t
arm_preferred_reload_class (rtx
, reg_class_t
);
103 static rtx
thumb_legitimize_address (rtx
, rtx
, machine_mode
);
104 inline static int thumb1_index_register_rtx_p (rtx
, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx
, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx
, int);
110 static void arm_print_operand_address (FILE *, machine_mode
, rtx
);
111 static bool arm_print_operand_punct_valid_p (unsigned char code
);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
113 static arm_cc
get_arm_condition_code (rtx
);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx
*, const char *, const char *,
117 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
118 static struct machine_function
*arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT
get_jump_table_size (rtx_jump_table_data
*);
121 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
122 static Mnode
*add_minipool_forward_ref (Mfix
*);
123 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
124 static Mnode
*add_minipool_backward_ref (Mfix
*);
125 static void assign_minipool_offsets (Mfix
*);
126 static void arm_print_value (FILE *, rtx
);
127 static void dump_minipool (rtx_insn
*);
128 static int arm_barrier_cost (rtx_insn
*);
129 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
130 static void push_minipool_barrier (rtx_insn
*, HOST_WIDE_INT
);
131 static void push_minipool_fix (rtx_insn
*, HOST_WIDE_INT
, rtx
*,
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn
*, HOST_WIDE_INT
, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree
);
138 static unsigned long arm_compute_func_type (void);
139 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
140 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
141 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
145 static tree
arm_handle_cmse_nonsecure_entry (tree
*, tree
, tree
, int, bool *);
146 static tree
arm_handle_cmse_nonsecure_call (tree
*, tree
, tree
, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree
, const_tree
);
150 static void arm_set_default_type_attributes (tree
);
151 static int arm_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn
**, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code
,
154 unsigned HOST_WIDE_INT val
,
155 struct four_ints
*return_sequence
);
156 static int optimal_immediate_sequence_1 (enum rtx_code code
,
157 unsigned HOST_WIDE_INT val
,
158 struct four_ints
*return_sequence
,
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree
, tree
);
162 static machine_mode
arm_promote_function_mode (const_tree
,
165 static bool arm_return_in_memory (const_tree
, const_tree
);
166 static rtx
arm_function_value (const_tree
, const_tree
, bool);
167 static rtx
arm_libcall_value_1 (machine_mode
);
168 static rtx
arm_libcall_value (machine_mode
, const_rtx
);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode
, rtx
);
175 static bool arm_legitimate_constant_p (machine_mode
, rtx
);
176 static bool arm_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
177 static int arm_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
178 static int arm_register_move_cost (machine_mode
, reg_class_t
, reg_class_t
);
179 static int arm_memory_move_cost (machine_mode
, reg_class_t
, bool);
180 static void emit_constant_insn (rtx cond
, rtx pattern
);
181 static rtx_insn
*emit_set_insn (rtx
, rtx
);
182 static rtx
emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t
, machine_mode
,
185 static rtx
arm_function_arg (cumulative_args_t
, machine_mode
,
187 static void arm_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static pad_direction
arm_function_arg_padding (machine_mode
, const_tree
);
190 static unsigned int arm_function_arg_boundary (machine_mode
, const_tree
);
191 static rtx
aapcs_allocate_return_reg (machine_mode
, const_tree
,
193 static rtx
aapcs_libcall_value (machine_mode
);
194 static int aapcs_select_return_coproc (const_tree
, const_tree
);
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
198 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
201 static void arm_encode_section_info (tree
, rtx
, int);
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree
, tree
*);
208 static void arm_setup_incoming_varargs (cumulative_args_t
, machine_mode
,
210 static bool arm_pass_by_reference (cumulative_args_t
,
211 machine_mode
, const_tree
, bool);
212 static bool arm_promote_prototypes (const_tree
);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree
);
216 static bool arm_must_pass_in_stack (machine_mode
, const_tree
);
217 static bool arm_return_in_memory (const_tree
, const_tree
);
219 static void arm_unwind_emit (FILE *, rtx_insn
*);
220 static bool arm_output_ttype (rtx
);
221 static void arm_asm_emit_except_personality (rtx
);
223 static void arm_asm_init_sections (void);
224 static rtx
arm_dwarf_register_span (rtx
);
226 static tree
arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree
arm_get_cookie_size (tree
);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree
);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree
arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree
, rtx
);
238 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option
*, struct gcc_options
*);
241 static void arm_option_restore (struct gcc_options
*,
242 struct cl_target_option
*);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option
*);
245 static void arm_set_current_function (tree
);
246 static bool arm_can_inline_p (tree
, tree
);
247 static void arm_relayout_function (tree
);
248 static bool arm_valid_target_attribute_p (tree
, tree
, tree
, int);
249 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (machine_mode
);
250 static bool arm_sched_can_speculate_insn (rtx_insn
*);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn
*);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
257 static bool arm_output_addr_const_extra (FILE *, rtx
);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree
);
260 static tree
arm_promoted_type (const_tree t
);
261 static bool arm_scalar_mode_supported_p (scalar_mode
);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx
, tree
, rtx
);
266 static rtx
arm_trampoline_adjust_address (rtx
);
267 static rtx_insn
*arm_pic_static_addr (rtx orig
, rtx reg
);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn
*, int, rtx_insn
*, int *);
271 static bool arm_array_mode_supported_p (machine_mode
,
272 unsigned HOST_WIDE_INT
);
273 static machine_mode
arm_preferred_simd_mode (scalar_mode
);
274 static bool arm_class_likely_spilled_p (reg_class_t
);
275 static HOST_WIDE_INT
arm_vector_alignment (const_tree type
);
276 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode
,
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method
arm_excess_precision (enum excess_precision_type
);
283 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode
, vec_perm_indices
);
292 static bool aarch_macro_fusion_pair_p (rtx_insn
*, rtx_insn
*);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
296 int misalign ATTRIBUTE_UNUSED
);
297 static unsigned arm_add_stmt_cost (void *data
, int count
,
298 enum vect_cost_for_stmt kind
,
299 struct _stmt_vec_info
*stmt_info
,
301 enum vect_cost_model_location where
);
303 static void arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
304 bool op0_preserve_value
);
305 static unsigned HOST_WIDE_INT
arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn
*, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
310 static section
*arm_function_section (tree
, enum node_frequency
, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
);
312 static unsigned int arm_elf_section_type_flags (tree decl
, const char *name
,
314 static void arm_expand_divmod_libfunc (rtx
, machine_mode
, rtx
, rtx
, rtx
*, rtx
*);
315 static opt_scalar_float_mode
arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode
);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode
);
318 static bool arm_modes_tieable_p (machine_mode
, machine_mode
);
319 static HOST_WIDE_INT
arm_constant_alignment (const_tree
, HOST_WIDE_INT
);
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table
[] =
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
329 { "long_call", 0, 0, false, true, true, NULL
, false },
330 /* Whereas these functions are always known to reside within the 26 bit
332 { "short_call", 0, 0, false, true, true, NULL
, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
344 /* ARM/PE has three new attributes:
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
353 { "dllimport", 0, 0, true, false, false, NULL
, false },
354 { "dllexport", 0, 0, true, false, false, NULL
, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry
, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call
, true },
368 { NULL
, 0, 0, false, false, false, NULL
, false }
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
466 #undef TARGET_ENCODE_SECTION_INFO
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
622 #endif /* ARM_UNWIND_INFO */
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack
;
805 static char * minipool_startobj
;
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped
= 5;
811 extern FILE * asm_out_file
;
813 /* True if we are currently building a constant table. */
814 int making_const_table
;
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune
= TARGET_CPU_arm_none
;
819 /* The current tuning set. */
820 const struct tune_params
*current_tune
;
822 /* Which floating point hardware to schedule for. */
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label
[14];
827 static int thumb_call_reg_needed
;
829 /* The bits in this mask specify which instruction scheduling options should
831 unsigned int tune_flags
= 0;
833 /* The highest ARM architecture version supported by the
835 enum base_architecture arm_base_arch
= BASE_ARCH_0
;
837 /* Active target architecture and tuning. */
839 struct arm_build_target arm_active_target
;
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
865 /* Nonzero if this chip supports the ARM 6K extensions. */
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
871 /* Nonzero if instructions present in ARMv6-M can be used. */
874 /* Nonzero if this chip supports the ARM 7 extensions. */
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae
= 0;
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm
= 0;
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
886 /* Nonzero if instructions present in ARMv8 can be used. */
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
897 int arm_fp16_inst
= 0;
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched
= 0;
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm
= 0;
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt
= 0;
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2
= 0;
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale
= 0;
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale
= 0;
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf
= 0;
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9
= 0;
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork
= 0;
931 /* Nonzero if chip supports Thumb 1. */
934 /* Nonzero if chip supports Thumb 2. */
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv
;
939 int arm_arch_thumb_hwdiv
;
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce
;
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits
= 0;
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool
= false;
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register
= INVALID_REGNUM
;
954 enum arm_pcs arm_pcs_default
;
956 /* For an explanation of these variables, see final_prescan_insn below. */
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc
;
962 int arm_target_label
;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count
= 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask
= 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen
= 0;
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc
= 0;
974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
975 int arm_arch_dotprod
= 0;
977 /* Nonzero if chip supports the ARMv8-M security extensions. */
978 int arm_arch_cmse
= 0;
980 /* Nonzero if the core has a very small, high-latency, multiply unit. */
981 int arm_m_profile_small_mul
= 0;
983 /* The condition codes of the ARM, and the inverse function. */
984 static const char * const arm_condition_codes
[] =
986 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
987 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
990 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
991 int arm_regs_in_sequence
[] =
993 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
996 #define ARM_LSL_NAME "lsl"
997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
999 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1003 /* Initialization code. */
1007 enum processor_type scheduler
;
1008 unsigned int tune_flags
;
1009 const struct tune_params
*tune
;
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1020 /* arm generic vectorizer costs. */
1022 struct cpu_vec_costs arm_default_vec_cost
= {
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 1, /* vec_unalign_load_cost. */
1031 1, /* vec_unalign_store_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1038 #include "aarch-cost-tables.h"
1042 const struct cpu_cost_table cortexa9_extra_costs
=
1049 COSTS_N_INSNS (1), /* shift_reg. */
1050 COSTS_N_INSNS (1), /* arith_shift. */
1051 COSTS_N_INSNS (2), /* arith_shift_reg. */
1053 COSTS_N_INSNS (1), /* log_shift_reg. */
1054 COSTS_N_INSNS (1), /* extend. */
1055 COSTS_N_INSNS (2), /* extend_arith. */
1056 COSTS_N_INSNS (1), /* bfi. */
1057 COSTS_N_INSNS (1), /* bfx. */
1061 true /* non_exec_costs_exec. */
1066 COSTS_N_INSNS (3), /* simple. */
1067 COSTS_N_INSNS (3), /* flag_setting. */
1068 COSTS_N_INSNS (2), /* extend. */
1069 COSTS_N_INSNS (3), /* add. */
1070 COSTS_N_INSNS (2), /* extend_add. */
1071 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 0, /* simple (N/A). */
1076 0, /* flag_setting (N/A). */
1077 COSTS_N_INSNS (4), /* extend. */
1079 COSTS_N_INSNS (4), /* extend_add. */
1085 COSTS_N_INSNS (2), /* load. */
1086 COSTS_N_INSNS (2), /* load_sign_extend. */
1087 COSTS_N_INSNS (2), /* ldrd. */
1088 COSTS_N_INSNS (2), /* ldm_1st. */
1089 1, /* ldm_regs_per_insn_1st. */
1090 2, /* ldm_regs_per_insn_subsequent. */
1091 COSTS_N_INSNS (5), /* loadf. */
1092 COSTS_N_INSNS (5), /* loadd. */
1093 COSTS_N_INSNS (1), /* load_unaligned. */
1094 COSTS_N_INSNS (2), /* store. */
1095 COSTS_N_INSNS (2), /* strd. */
1096 COSTS_N_INSNS (2), /* stm_1st. */
1097 1, /* stm_regs_per_insn_1st. */
1098 2, /* stm_regs_per_insn_subsequent. */
1099 COSTS_N_INSNS (1), /* storef. */
1100 COSTS_N_INSNS (1), /* stored. */
1101 COSTS_N_INSNS (1), /* store_unaligned. */
1102 COSTS_N_INSNS (1), /* loadv. */
1103 COSTS_N_INSNS (1) /* storev. */
1108 COSTS_N_INSNS (14), /* div. */
1109 COSTS_N_INSNS (4), /* mult. */
1110 COSTS_N_INSNS (7), /* mult_addsub. */
1111 COSTS_N_INSNS (30), /* fma. */
1112 COSTS_N_INSNS (3), /* addsub. */
1113 COSTS_N_INSNS (1), /* fpconst. */
1114 COSTS_N_INSNS (1), /* neg. */
1115 COSTS_N_INSNS (3), /* compare. */
1116 COSTS_N_INSNS (3), /* widen. */
1117 COSTS_N_INSNS (3), /* narrow. */
1118 COSTS_N_INSNS (3), /* toint. */
1119 COSTS_N_INSNS (3), /* fromint. */
1120 COSTS_N_INSNS (3) /* roundint. */
1124 COSTS_N_INSNS (24), /* div. */
1125 COSTS_N_INSNS (5), /* mult. */
1126 COSTS_N_INSNS (8), /* mult_addsub. */
1127 COSTS_N_INSNS (30), /* fma. */
1128 COSTS_N_INSNS (3), /* addsub. */
1129 COSTS_N_INSNS (1), /* fpconst. */
1130 COSTS_N_INSNS (1), /* neg. */
1131 COSTS_N_INSNS (3), /* compare. */
1132 COSTS_N_INSNS (3), /* widen. */
1133 COSTS_N_INSNS (3), /* narrow. */
1134 COSTS_N_INSNS (3), /* toint. */
1135 COSTS_N_INSNS (3), /* fromint. */
1136 COSTS_N_INSNS (3) /* roundint. */
1141 COSTS_N_INSNS (1) /* alu. */
1145 const struct cpu_cost_table cortexa8_extra_costs
=
1151 COSTS_N_INSNS (1), /* shift. */
1153 COSTS_N_INSNS (1), /* arith_shift. */
1154 0, /* arith_shift_reg. */
1155 COSTS_N_INSNS (1), /* log_shift. */
1156 0, /* log_shift_reg. */
1158 0, /* extend_arith. */
1164 true /* non_exec_costs_exec. */
1169 COSTS_N_INSNS (1), /* simple. */
1170 COSTS_N_INSNS (1), /* flag_setting. */
1171 COSTS_N_INSNS (1), /* extend. */
1172 COSTS_N_INSNS (1), /* add. */
1173 COSTS_N_INSNS (1), /* extend_add. */
1174 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 0, /* simple (N/A). */
1179 0, /* flag_setting (N/A). */
1180 COSTS_N_INSNS (2), /* extend. */
1182 COSTS_N_INSNS (2), /* extend_add. */
1188 COSTS_N_INSNS (1), /* load. */
1189 COSTS_N_INSNS (1), /* load_sign_extend. */
1190 COSTS_N_INSNS (1), /* ldrd. */
1191 COSTS_N_INSNS (1), /* ldm_1st. */
1192 1, /* ldm_regs_per_insn_1st. */
1193 2, /* ldm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (1), /* loadf. */
1195 COSTS_N_INSNS (1), /* loadd. */
1196 COSTS_N_INSNS (1), /* load_unaligned. */
1197 COSTS_N_INSNS (1), /* store. */
1198 COSTS_N_INSNS (1), /* strd. */
1199 COSTS_N_INSNS (1), /* stm_1st. */
1200 1, /* stm_regs_per_insn_1st. */
1201 2, /* stm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (1), /* storef. */
1203 COSTS_N_INSNS (1), /* stored. */
1204 COSTS_N_INSNS (1), /* store_unaligned. */
1205 COSTS_N_INSNS (1), /* loadv. */
1206 COSTS_N_INSNS (1) /* storev. */
1211 COSTS_N_INSNS (36), /* div. */
1212 COSTS_N_INSNS (11), /* mult. */
1213 COSTS_N_INSNS (20), /* mult_addsub. */
1214 COSTS_N_INSNS (30), /* fma. */
1215 COSTS_N_INSNS (9), /* addsub. */
1216 COSTS_N_INSNS (3), /* fpconst. */
1217 COSTS_N_INSNS (3), /* neg. */
1218 COSTS_N_INSNS (6), /* compare. */
1219 COSTS_N_INSNS (4), /* widen. */
1220 COSTS_N_INSNS (4), /* narrow. */
1221 COSTS_N_INSNS (8), /* toint. */
1222 COSTS_N_INSNS (8), /* fromint. */
1223 COSTS_N_INSNS (8) /* roundint. */
1227 COSTS_N_INSNS (64), /* div. */
1228 COSTS_N_INSNS (16), /* mult. */
1229 COSTS_N_INSNS (25), /* mult_addsub. */
1230 COSTS_N_INSNS (30), /* fma. */
1231 COSTS_N_INSNS (9), /* addsub. */
1232 COSTS_N_INSNS (3), /* fpconst. */
1233 COSTS_N_INSNS (3), /* neg. */
1234 COSTS_N_INSNS (6), /* compare. */
1235 COSTS_N_INSNS (6), /* widen. */
1236 COSTS_N_INSNS (6), /* narrow. */
1237 COSTS_N_INSNS (8), /* toint. */
1238 COSTS_N_INSNS (8), /* fromint. */
1239 COSTS_N_INSNS (8) /* roundint. */
1244 COSTS_N_INSNS (1) /* alu. */
1248 const struct cpu_cost_table cortexa5_extra_costs
=
1254 COSTS_N_INSNS (1), /* shift. */
1255 COSTS_N_INSNS (1), /* shift_reg. */
1256 COSTS_N_INSNS (1), /* arith_shift. */
1257 COSTS_N_INSNS (1), /* arith_shift_reg. */
1258 COSTS_N_INSNS (1), /* log_shift. */
1259 COSTS_N_INSNS (1), /* log_shift_reg. */
1260 COSTS_N_INSNS (1), /* extend. */
1261 COSTS_N_INSNS (1), /* extend_arith. */
1262 COSTS_N_INSNS (1), /* bfi. */
1263 COSTS_N_INSNS (1), /* bfx. */
1264 COSTS_N_INSNS (1), /* clz. */
1265 COSTS_N_INSNS (1), /* rev. */
1267 true /* non_exec_costs_exec. */
1274 COSTS_N_INSNS (1), /* flag_setting. */
1275 COSTS_N_INSNS (1), /* extend. */
1276 COSTS_N_INSNS (1), /* add. */
1277 COSTS_N_INSNS (1), /* extend_add. */
1278 COSTS_N_INSNS (7) /* idiv. */
1282 0, /* simple (N/A). */
1283 0, /* flag_setting (N/A). */
1284 COSTS_N_INSNS (1), /* extend. */
1286 COSTS_N_INSNS (2), /* extend_add. */
1292 COSTS_N_INSNS (1), /* load. */
1293 COSTS_N_INSNS (1), /* load_sign_extend. */
1294 COSTS_N_INSNS (6), /* ldrd. */
1295 COSTS_N_INSNS (1), /* ldm_1st. */
1296 1, /* ldm_regs_per_insn_1st. */
1297 2, /* ldm_regs_per_insn_subsequent. */
1298 COSTS_N_INSNS (2), /* loadf. */
1299 COSTS_N_INSNS (4), /* loadd. */
1300 COSTS_N_INSNS (1), /* load_unaligned. */
1301 COSTS_N_INSNS (1), /* store. */
1302 COSTS_N_INSNS (3), /* strd. */
1303 COSTS_N_INSNS (1), /* stm_1st. */
1304 1, /* stm_regs_per_insn_1st. */
1305 2, /* stm_regs_per_insn_subsequent. */
1306 COSTS_N_INSNS (2), /* storef. */
1307 COSTS_N_INSNS (2), /* stored. */
1308 COSTS_N_INSNS (1), /* store_unaligned. */
1309 COSTS_N_INSNS (1), /* loadv. */
1310 COSTS_N_INSNS (1) /* storev. */
1315 COSTS_N_INSNS (15), /* div. */
1316 COSTS_N_INSNS (3), /* mult. */
1317 COSTS_N_INSNS (7), /* mult_addsub. */
1318 COSTS_N_INSNS (7), /* fma. */
1319 COSTS_N_INSNS (3), /* addsub. */
1320 COSTS_N_INSNS (3), /* fpconst. */
1321 COSTS_N_INSNS (3), /* neg. */
1322 COSTS_N_INSNS (3), /* compare. */
1323 COSTS_N_INSNS (3), /* widen. */
1324 COSTS_N_INSNS (3), /* narrow. */
1325 COSTS_N_INSNS (3), /* toint. */
1326 COSTS_N_INSNS (3), /* fromint. */
1327 COSTS_N_INSNS (3) /* roundint. */
1331 COSTS_N_INSNS (30), /* div. */
1332 COSTS_N_INSNS (6), /* mult. */
1333 COSTS_N_INSNS (10), /* mult_addsub. */
1334 COSTS_N_INSNS (7), /* fma. */
1335 COSTS_N_INSNS (3), /* addsub. */
1336 COSTS_N_INSNS (3), /* fpconst. */
1337 COSTS_N_INSNS (3), /* neg. */
1338 COSTS_N_INSNS (3), /* compare. */
1339 COSTS_N_INSNS (3), /* widen. */
1340 COSTS_N_INSNS (3), /* narrow. */
1341 COSTS_N_INSNS (3), /* toint. */
1342 COSTS_N_INSNS (3), /* fromint. */
1343 COSTS_N_INSNS (3) /* roundint. */
1348 COSTS_N_INSNS (1) /* alu. */
1353 const struct cpu_cost_table cortexa7_extra_costs
=
1359 COSTS_N_INSNS (1), /* shift. */
1360 COSTS_N_INSNS (1), /* shift_reg. */
1361 COSTS_N_INSNS (1), /* arith_shift. */
1362 COSTS_N_INSNS (1), /* arith_shift_reg. */
1363 COSTS_N_INSNS (1), /* log_shift. */
1364 COSTS_N_INSNS (1), /* log_shift_reg. */
1365 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (1), /* extend_arith. */
1367 COSTS_N_INSNS (1), /* bfi. */
1368 COSTS_N_INSNS (1), /* bfx. */
1369 COSTS_N_INSNS (1), /* clz. */
1370 COSTS_N_INSNS (1), /* rev. */
1372 true /* non_exec_costs_exec. */
1379 COSTS_N_INSNS (1), /* flag_setting. */
1380 COSTS_N_INSNS (1), /* extend. */
1381 COSTS_N_INSNS (1), /* add. */
1382 COSTS_N_INSNS (1), /* extend_add. */
1383 COSTS_N_INSNS (7) /* idiv. */
1387 0, /* simple (N/A). */
1388 0, /* flag_setting (N/A). */
1389 COSTS_N_INSNS (1), /* extend. */
1391 COSTS_N_INSNS (2), /* extend_add. */
1397 COSTS_N_INSNS (1), /* load. */
1398 COSTS_N_INSNS (1), /* load_sign_extend. */
1399 COSTS_N_INSNS (3), /* ldrd. */
1400 COSTS_N_INSNS (1), /* ldm_1st. */
1401 1, /* ldm_regs_per_insn_1st. */
1402 2, /* ldm_regs_per_insn_subsequent. */
1403 COSTS_N_INSNS (2), /* loadf. */
1404 COSTS_N_INSNS (2), /* loadd. */
1405 COSTS_N_INSNS (1), /* load_unaligned. */
1406 COSTS_N_INSNS (1), /* store. */
1407 COSTS_N_INSNS (3), /* strd. */
1408 COSTS_N_INSNS (1), /* stm_1st. */
1409 1, /* stm_regs_per_insn_1st. */
1410 2, /* stm_regs_per_insn_subsequent. */
1411 COSTS_N_INSNS (2), /* storef. */
1412 COSTS_N_INSNS (2), /* stored. */
1413 COSTS_N_INSNS (1), /* store_unaligned. */
1414 COSTS_N_INSNS (1), /* loadv. */
1415 COSTS_N_INSNS (1) /* storev. */
1420 COSTS_N_INSNS (15), /* div. */
1421 COSTS_N_INSNS (3), /* mult. */
1422 COSTS_N_INSNS (7), /* mult_addsub. */
1423 COSTS_N_INSNS (7), /* fma. */
1424 COSTS_N_INSNS (3), /* addsub. */
1425 COSTS_N_INSNS (3), /* fpconst. */
1426 COSTS_N_INSNS (3), /* neg. */
1427 COSTS_N_INSNS (3), /* compare. */
1428 COSTS_N_INSNS (3), /* widen. */
1429 COSTS_N_INSNS (3), /* narrow. */
1430 COSTS_N_INSNS (3), /* toint. */
1431 COSTS_N_INSNS (3), /* fromint. */
1432 COSTS_N_INSNS (3) /* roundint. */
1436 COSTS_N_INSNS (30), /* div. */
1437 COSTS_N_INSNS (6), /* mult. */
1438 COSTS_N_INSNS (10), /* mult_addsub. */
1439 COSTS_N_INSNS (7), /* fma. */
1440 COSTS_N_INSNS (3), /* addsub. */
1441 COSTS_N_INSNS (3), /* fpconst. */
1442 COSTS_N_INSNS (3), /* neg. */
1443 COSTS_N_INSNS (3), /* compare. */
1444 COSTS_N_INSNS (3), /* widen. */
1445 COSTS_N_INSNS (3), /* narrow. */
1446 COSTS_N_INSNS (3), /* toint. */
1447 COSTS_N_INSNS (3), /* fromint. */
1448 COSTS_N_INSNS (3) /* roundint. */
1453 COSTS_N_INSNS (1) /* alu. */
1457 const struct cpu_cost_table cortexa12_extra_costs
=
1464 COSTS_N_INSNS (1), /* shift_reg. */
1465 COSTS_N_INSNS (1), /* arith_shift. */
1466 COSTS_N_INSNS (1), /* arith_shift_reg. */
1467 COSTS_N_INSNS (1), /* log_shift. */
1468 COSTS_N_INSNS (1), /* log_shift_reg. */
1470 COSTS_N_INSNS (1), /* extend_arith. */
1472 COSTS_N_INSNS (1), /* bfx. */
1473 COSTS_N_INSNS (1), /* clz. */
1474 COSTS_N_INSNS (1), /* rev. */
1476 true /* non_exec_costs_exec. */
1481 COSTS_N_INSNS (2), /* simple. */
1482 COSTS_N_INSNS (3), /* flag_setting. */
1483 COSTS_N_INSNS (2), /* extend. */
1484 COSTS_N_INSNS (3), /* add. */
1485 COSTS_N_INSNS (2), /* extend_add. */
1486 COSTS_N_INSNS (18) /* idiv. */
1490 0, /* simple (N/A). */
1491 0, /* flag_setting (N/A). */
1492 COSTS_N_INSNS (3), /* extend. */
1494 COSTS_N_INSNS (3), /* extend_add. */
1500 COSTS_N_INSNS (3), /* load. */
1501 COSTS_N_INSNS (3), /* load_sign_extend. */
1502 COSTS_N_INSNS (3), /* ldrd. */
1503 COSTS_N_INSNS (3), /* ldm_1st. */
1504 1, /* ldm_regs_per_insn_1st. */
1505 2, /* ldm_regs_per_insn_subsequent. */
1506 COSTS_N_INSNS (3), /* loadf. */
1507 COSTS_N_INSNS (3), /* loadd. */
1508 0, /* load_unaligned. */
1512 1, /* stm_regs_per_insn_1st. */
1513 2, /* stm_regs_per_insn_subsequent. */
1514 COSTS_N_INSNS (2), /* storef. */
1515 COSTS_N_INSNS (2), /* stored. */
1516 0, /* store_unaligned. */
1517 COSTS_N_INSNS (1), /* loadv. */
1518 COSTS_N_INSNS (1) /* storev. */
1523 COSTS_N_INSNS (17), /* div. */
1524 COSTS_N_INSNS (4), /* mult. */
1525 COSTS_N_INSNS (8), /* mult_addsub. */
1526 COSTS_N_INSNS (8), /* fma. */
1527 COSTS_N_INSNS (4), /* addsub. */
1528 COSTS_N_INSNS (2), /* fpconst. */
1529 COSTS_N_INSNS (2), /* neg. */
1530 COSTS_N_INSNS (2), /* compare. */
1531 COSTS_N_INSNS (4), /* widen. */
1532 COSTS_N_INSNS (4), /* narrow. */
1533 COSTS_N_INSNS (4), /* toint. */
1534 COSTS_N_INSNS (4), /* fromint. */
1535 COSTS_N_INSNS (4) /* roundint. */
1539 COSTS_N_INSNS (31), /* div. */
1540 COSTS_N_INSNS (4), /* mult. */
1541 COSTS_N_INSNS (8), /* mult_addsub. */
1542 COSTS_N_INSNS (8), /* fma. */
1543 COSTS_N_INSNS (4), /* addsub. */
1544 COSTS_N_INSNS (2), /* fpconst. */
1545 COSTS_N_INSNS (2), /* neg. */
1546 COSTS_N_INSNS (2), /* compare. */
1547 COSTS_N_INSNS (4), /* widen. */
1548 COSTS_N_INSNS (4), /* narrow. */
1549 COSTS_N_INSNS (4), /* toint. */
1550 COSTS_N_INSNS (4), /* fromint. */
1551 COSTS_N_INSNS (4) /* roundint. */
1556 COSTS_N_INSNS (1) /* alu. */
1560 const struct cpu_cost_table cortexa15_extra_costs
=
1568 COSTS_N_INSNS (1), /* arith_shift. */
1569 COSTS_N_INSNS (1), /* arith_shift_reg. */
1570 COSTS_N_INSNS (1), /* log_shift. */
1571 COSTS_N_INSNS (1), /* log_shift_reg. */
1573 COSTS_N_INSNS (1), /* extend_arith. */
1574 COSTS_N_INSNS (1), /* bfi. */
1579 true /* non_exec_costs_exec. */
1584 COSTS_N_INSNS (2), /* simple. */
1585 COSTS_N_INSNS (3), /* flag_setting. */
1586 COSTS_N_INSNS (2), /* extend. */
1587 COSTS_N_INSNS (2), /* add. */
1588 COSTS_N_INSNS (2), /* extend_add. */
1589 COSTS_N_INSNS (18) /* idiv. */
1593 0, /* simple (N/A). */
1594 0, /* flag_setting (N/A). */
1595 COSTS_N_INSNS (3), /* extend. */
1597 COSTS_N_INSNS (3), /* extend_add. */
1603 COSTS_N_INSNS (3), /* load. */
1604 COSTS_N_INSNS (3), /* load_sign_extend. */
1605 COSTS_N_INSNS (3), /* ldrd. */
1606 COSTS_N_INSNS (4), /* ldm_1st. */
1607 1, /* ldm_regs_per_insn_1st. */
1608 2, /* ldm_regs_per_insn_subsequent. */
1609 COSTS_N_INSNS (4), /* loadf. */
1610 COSTS_N_INSNS (4), /* loadd. */
1611 0, /* load_unaligned. */
1614 COSTS_N_INSNS (1), /* stm_1st. */
1615 1, /* stm_regs_per_insn_1st. */
1616 2, /* stm_regs_per_insn_subsequent. */
1619 0, /* store_unaligned. */
1620 COSTS_N_INSNS (1), /* loadv. */
1621 COSTS_N_INSNS (1) /* storev. */
1626 COSTS_N_INSNS (17), /* div. */
1627 COSTS_N_INSNS (4), /* mult. */
1628 COSTS_N_INSNS (8), /* mult_addsub. */
1629 COSTS_N_INSNS (8), /* fma. */
1630 COSTS_N_INSNS (4), /* addsub. */
1631 COSTS_N_INSNS (2), /* fpconst. */
1632 COSTS_N_INSNS (2), /* neg. */
1633 COSTS_N_INSNS (5), /* compare. */
1634 COSTS_N_INSNS (4), /* widen. */
1635 COSTS_N_INSNS (4), /* narrow. */
1636 COSTS_N_INSNS (4), /* toint. */
1637 COSTS_N_INSNS (4), /* fromint. */
1638 COSTS_N_INSNS (4) /* roundint. */
1642 COSTS_N_INSNS (31), /* div. */
1643 COSTS_N_INSNS (4), /* mult. */
1644 COSTS_N_INSNS (8), /* mult_addsub. */
1645 COSTS_N_INSNS (8), /* fma. */
1646 COSTS_N_INSNS (4), /* addsub. */
1647 COSTS_N_INSNS (2), /* fpconst. */
1648 COSTS_N_INSNS (2), /* neg. */
1649 COSTS_N_INSNS (2), /* compare. */
1650 COSTS_N_INSNS (4), /* widen. */
1651 COSTS_N_INSNS (4), /* narrow. */
1652 COSTS_N_INSNS (4), /* toint. */
1653 COSTS_N_INSNS (4), /* fromint. */
1654 COSTS_N_INSNS (4) /* roundint. */
1659 COSTS_N_INSNS (1) /* alu. */
1663 const struct cpu_cost_table v7m_extra_costs
=
1671 0, /* arith_shift. */
1672 COSTS_N_INSNS (1), /* arith_shift_reg. */
1674 COSTS_N_INSNS (1), /* log_shift_reg. */
1676 COSTS_N_INSNS (1), /* extend_arith. */
1681 COSTS_N_INSNS (1), /* non_exec. */
1682 false /* non_exec_costs_exec. */
1687 COSTS_N_INSNS (1), /* simple. */
1688 COSTS_N_INSNS (1), /* flag_setting. */
1689 COSTS_N_INSNS (2), /* extend. */
1690 COSTS_N_INSNS (1), /* add. */
1691 COSTS_N_INSNS (3), /* extend_add. */
1692 COSTS_N_INSNS (8) /* idiv. */
1696 0, /* simple (N/A). */
1697 0, /* flag_setting (N/A). */
1698 COSTS_N_INSNS (2), /* extend. */
1700 COSTS_N_INSNS (3), /* extend_add. */
1706 COSTS_N_INSNS (2), /* load. */
1707 0, /* load_sign_extend. */
1708 COSTS_N_INSNS (3), /* ldrd. */
1709 COSTS_N_INSNS (2), /* ldm_1st. */
1710 1, /* ldm_regs_per_insn_1st. */
1711 1, /* ldm_regs_per_insn_subsequent. */
1712 COSTS_N_INSNS (2), /* loadf. */
1713 COSTS_N_INSNS (3), /* loadd. */
1714 COSTS_N_INSNS (1), /* load_unaligned. */
1715 COSTS_N_INSNS (2), /* store. */
1716 COSTS_N_INSNS (3), /* strd. */
1717 COSTS_N_INSNS (2), /* stm_1st. */
1718 1, /* stm_regs_per_insn_1st. */
1719 1, /* stm_regs_per_insn_subsequent. */
1720 COSTS_N_INSNS (2), /* storef. */
1721 COSTS_N_INSNS (3), /* stored. */
1722 COSTS_N_INSNS (1), /* store_unaligned. */
1723 COSTS_N_INSNS (1), /* loadv. */
1724 COSTS_N_INSNS (1) /* storev. */
1729 COSTS_N_INSNS (7), /* div. */
1730 COSTS_N_INSNS (2), /* mult. */
1731 COSTS_N_INSNS (5), /* mult_addsub. */
1732 COSTS_N_INSNS (3), /* fma. */
1733 COSTS_N_INSNS (1), /* addsub. */
1745 COSTS_N_INSNS (15), /* div. */
1746 COSTS_N_INSNS (5), /* mult. */
1747 COSTS_N_INSNS (7), /* mult_addsub. */
1748 COSTS_N_INSNS (7), /* fma. */
1749 COSTS_N_INSNS (3), /* addsub. */
1762 COSTS_N_INSNS (1) /* alu. */
1766 const struct tune_params arm_slowmul_tune
=
1768 &generic_extra_costs
, /* Insn extra costs. */
1769 NULL
, /* Sched adj cost. */
1770 arm_default_branch_cost
,
1771 &arm_default_vec_cost
,
1772 3, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL
,
1777 tune_params::PREF_CONST_POOL_TRUE
,
1778 tune_params::PREF_LDRD_FALSE
,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER
,
1782 tune_params::PREF_NEON_64_FALSE
,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1784 tune_params::FUSE_NOTHING
,
1785 tune_params::SCHED_AUTOPREF_OFF
1788 const struct tune_params arm_fastmul_tune
=
1790 &generic_extra_costs
, /* Insn extra costs. */
1791 NULL
, /* Sched adj cost. */
1792 arm_default_branch_cost
,
1793 &arm_default_vec_cost
,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL
,
1799 tune_params::PREF_CONST_POOL_TRUE
,
1800 tune_params::PREF_LDRD_FALSE
,
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER
,
1804 tune_params::PREF_NEON_64_FALSE
,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1806 tune_params::FUSE_NOTHING
,
1807 tune_params::SCHED_AUTOPREF_OFF
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811 skipping is shorter. Set max_insns_skipped to a lower value. */
1813 const struct tune_params arm_strongarm_tune
=
1815 &generic_extra_costs
, /* Insn extra costs. */
1816 NULL
, /* Sched adj cost. */
1817 arm_default_branch_cost
,
1818 &arm_default_vec_cost
,
1819 1, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL
,
1824 tune_params::PREF_CONST_POOL_TRUE
,
1825 tune_params::PREF_LDRD_FALSE
,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER
,
1829 tune_params::PREF_NEON_64_FALSE
,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1831 tune_params::FUSE_NOTHING
,
1832 tune_params::SCHED_AUTOPREF_OFF
1835 const struct tune_params arm_xscale_tune
=
1837 &generic_extra_costs
, /* Insn extra costs. */
1838 xscale_sched_adjust_cost
,
1839 arm_default_branch_cost
,
1840 &arm_default_vec_cost
,
1841 2, /* Constant limit. */
1842 3, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL
,
1846 tune_params::PREF_CONST_POOL_TRUE
,
1847 tune_params::PREF_LDRD_FALSE
,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER
,
1851 tune_params::PREF_NEON_64_FALSE
,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1853 tune_params::FUSE_NOTHING
,
1854 tune_params::SCHED_AUTOPREF_OFF
1857 const struct tune_params arm_9e_tune
=
1859 &generic_extra_costs
, /* Insn extra costs. */
1860 NULL
, /* Sched adj cost. */
1861 arm_default_branch_cost
,
1862 &arm_default_vec_cost
,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 1, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL
,
1868 tune_params::PREF_CONST_POOL_TRUE
,
1869 tune_params::PREF_LDRD_FALSE
,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER
,
1873 tune_params::PREF_NEON_64_FALSE
,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1875 tune_params::FUSE_NOTHING
,
1876 tune_params::SCHED_AUTOPREF_OFF
1879 const struct tune_params arm_marvell_pj4_tune
=
1881 &generic_extra_costs
, /* Insn extra costs. */
1882 NULL
, /* Sched adj cost. */
1883 arm_default_branch_cost
,
1884 &arm_default_vec_cost
,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL
,
1890 tune_params::PREF_CONST_POOL_TRUE
,
1891 tune_params::PREF_LDRD_FALSE
,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER
,
1895 tune_params::PREF_NEON_64_FALSE
,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1897 tune_params::FUSE_NOTHING
,
1898 tune_params::SCHED_AUTOPREF_OFF
1901 const struct tune_params arm_v6t2_tune
=
1903 &generic_extra_costs
, /* Insn extra costs. */
1904 NULL
, /* Sched adj cost. */
1905 arm_default_branch_cost
,
1906 &arm_default_vec_cost
,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 1, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL
,
1912 tune_params::PREF_CONST_POOL_FALSE
,
1913 tune_params::PREF_LDRD_FALSE
,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER
,
1917 tune_params::PREF_NEON_64_FALSE
,
1918 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1919 tune_params::FUSE_NOTHING
,
1920 tune_params::SCHED_AUTOPREF_OFF
1924 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1925 const struct tune_params arm_cortex_tune
=
1927 &generic_extra_costs
,
1928 NULL
, /* Sched adj cost. */
1929 arm_default_branch_cost
,
1930 &arm_default_vec_cost
,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL
,
1936 tune_params::PREF_CONST_POOL_FALSE
,
1937 tune_params::PREF_LDRD_FALSE
,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER
,
1941 tune_params::PREF_NEON_64_FALSE
,
1942 tune_params::PREF_NEON_STRINGOPS_FALSE
,
1943 tune_params::FUSE_NOTHING
,
1944 tune_params::SCHED_AUTOPREF_OFF
1947 const struct tune_params arm_cortex_a8_tune
=
1949 &cortexa8_extra_costs
,
1950 NULL
, /* Sched adj cost. */
1951 arm_default_branch_cost
,
1952 &arm_default_vec_cost
,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL
,
1958 tune_params::PREF_CONST_POOL_FALSE
,
1959 tune_params::PREF_LDRD_FALSE
,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER
,
1963 tune_params::PREF_NEON_64_FALSE
,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1965 tune_params::FUSE_NOTHING
,
1966 tune_params::SCHED_AUTOPREF_OFF
1969 const struct tune_params arm_cortex_a7_tune
=
1971 &cortexa7_extra_costs
,
1972 NULL
, /* Sched adj cost. */
1973 arm_default_branch_cost
,
1974 &arm_default_vec_cost
,
1975 1, /* Constant limit. */
1976 5, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 2, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL
,
1980 tune_params::PREF_CONST_POOL_FALSE
,
1981 tune_params::PREF_LDRD_FALSE
,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_NEITHER
,
1985 tune_params::PREF_NEON_64_FALSE
,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE
,
1987 tune_params::FUSE_NOTHING
,
1988 tune_params::SCHED_AUTOPREF_OFF
1991 const struct tune_params arm_cortex_a15_tune
=
1993 &cortexa15_extra_costs
,
1994 NULL
, /* Sched adj cost. */
1995 arm_default_branch_cost
,
1996 &arm_default_vec_cost
,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 3, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL
,
2002 tune_params::PREF_CONST_POOL_FALSE
,
2003 tune_params::PREF_LDRD_TRUE
,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL
,
2007 tune_params::PREF_NEON_64_FALSE
,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2009 tune_params::FUSE_NOTHING
,
2010 tune_params::SCHED_AUTOPREF_FULL
2013 const struct tune_params arm_cortex_a35_tune
=
2015 &cortexa53_extra_costs
,
2016 NULL
, /* Sched adj cost. */
2017 arm_default_branch_cost
,
2018 &arm_default_vec_cost
,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 1, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL
,
2024 tune_params::PREF_CONST_POOL_FALSE
,
2025 tune_params::PREF_LDRD_FALSE
,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER
,
2029 tune_params::PREF_NEON_64_FALSE
,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2032 tune_params::SCHED_AUTOPREF_OFF
2035 const struct tune_params arm_cortex_a53_tune
=
2037 &cortexa53_extra_costs
,
2038 NULL
, /* Sched adj cost. */
2039 arm_default_branch_cost
,
2040 &arm_default_vec_cost
,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL
,
2046 tune_params::PREF_CONST_POOL_FALSE
,
2047 tune_params::PREF_LDRD_FALSE
,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER
,
2051 tune_params::PREF_NEON_64_FALSE
,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2054 tune_params::SCHED_AUTOPREF_OFF
2057 const struct tune_params arm_cortex_a57_tune
=
2059 &cortexa57_extra_costs
,
2060 NULL
, /* Sched adj cost. */
2061 arm_default_branch_cost
,
2062 &arm_default_vec_cost
,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL
,
2068 tune_params::PREF_CONST_POOL_FALSE
,
2069 tune_params::PREF_LDRD_TRUE
,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL
,
2073 tune_params::PREF_NEON_64_FALSE
,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2075 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
| tune_params::FUSE_AES_AESMC
),
2076 tune_params::SCHED_AUTOPREF_FULL
2079 const struct tune_params arm_exynosm1_tune
=
2081 &exynosm1_extra_costs
,
2082 NULL
, /* Sched adj cost. */
2083 arm_default_branch_cost
,
2084 &arm_default_vec_cost
,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 3, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL
,
2090 tune_params::PREF_CONST_POOL_FALSE
,
2091 tune_params::PREF_LDRD_TRUE
,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL
,
2095 tune_params::PREF_NEON_64_FALSE
,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2097 tune_params::FUSE_NOTHING
,
2098 tune_params::SCHED_AUTOPREF_OFF
2101 const struct tune_params arm_xgene1_tune
=
2103 &xgene1_extra_costs
,
2104 NULL
, /* Sched adj cost. */
2105 arm_default_branch_cost
,
2106 &arm_default_vec_cost
,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 32, /* Memset max inline. */
2110 4, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL
,
2112 tune_params::PREF_CONST_POOL_FALSE
,
2113 tune_params::PREF_LDRD_TRUE
,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL
,
2117 tune_params::PREF_NEON_64_FALSE
,
2118 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2119 tune_params::FUSE_NOTHING
,
2120 tune_params::SCHED_AUTOPREF_OFF
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124 less appealing. Set max_insns_skipped to a low value. */
2126 const struct tune_params arm_cortex_a5_tune
=
2128 &cortexa5_extra_costs
,
2129 NULL
, /* Sched adj cost. */
2130 arm_cortex_a5_branch_cost
,
2131 &arm_default_vec_cost
,
2132 1, /* Constant limit. */
2133 1, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_NOT_BENEFICIAL
,
2137 tune_params::PREF_CONST_POOL_FALSE
,
2138 tune_params::PREF_LDRD_FALSE
,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER
,
2142 tune_params::PREF_NEON_64_FALSE
,
2143 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2144 tune_params::FUSE_NOTHING
,
2145 tune_params::SCHED_AUTOPREF_OFF
2148 const struct tune_params arm_cortex_a9_tune
=
2150 &cortexa9_extra_costs
,
2151 cortex_a9_sched_adjust_cost
,
2152 arm_default_branch_cost
,
2153 &arm_default_vec_cost
,
2154 1, /* Constant limit. */
2155 5, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_BENEFICIAL(4,32,32),
2159 tune_params::PREF_CONST_POOL_FALSE
,
2160 tune_params::PREF_LDRD_FALSE
,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_NEITHER
,
2164 tune_params::PREF_NEON_64_FALSE
,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2166 tune_params::FUSE_NOTHING
,
2167 tune_params::SCHED_AUTOPREF_OFF
2170 const struct tune_params arm_cortex_a12_tune
=
2172 &cortexa12_extra_costs
,
2173 NULL
, /* Sched adj cost. */
2174 arm_default_branch_cost
,
2175 &arm_default_vec_cost
, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL
,
2181 tune_params::PREF_CONST_POOL_FALSE
,
2182 tune_params::PREF_LDRD_TRUE
,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL
,
2186 tune_params::PREF_NEON_64_FALSE
,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2188 FUSE_OPS (tune_params::FUSE_MOVW_MOVT
),
2189 tune_params::SCHED_AUTOPREF_OFF
2192 const struct tune_params arm_cortex_a73_tune
=
2194 &cortexa57_extra_costs
,
2195 NULL
, /* Sched adj cost. */
2196 arm_default_branch_cost
,
2197 &arm_default_vec_cost
, /* Vectorizer costs. */
2198 1, /* Constant limit. */
2199 2, /* Max cond insns. */
2200 8, /* Memset max inline. */
2201 2, /* Issue rate. */
2202 ARM_PREFETCH_NOT_BENEFICIAL
,
2203 tune_params::PREF_CONST_POOL_FALSE
,
2204 tune_params::PREF_LDRD_TRUE
,
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2207 tune_params::DISPARAGE_FLAGS_ALL
,
2208 tune_params::PREF_NEON_64_FALSE
,
2209 tune_params::PREF_NEON_STRINGOPS_TRUE
,
2210 FUSE_OPS (tune_params::FUSE_AES_AESMC
| tune_params::FUSE_MOVW_MOVT
),
2211 tune_params::SCHED_AUTOPREF_FULL
2214 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2215 cycle to execute each. An LDR from the constant pool also takes two cycles
2216 to execute, but mildly increases pipelining opportunity (consecutive
2217 loads/stores can be pipelined together, saving one cycle), and may also
2218 improve icache utilisation. Hence we prefer the constant pool for such
2221 const struct tune_params arm_v7m_tune
=
2224 NULL
, /* Sched adj cost. */
2225 arm_cortex_m_branch_cost
,
2226 &arm_default_vec_cost
,
2227 1, /* Constant limit. */
2228 2, /* Max cond insns. */
2229 8, /* Memset max inline. */
2230 1, /* Issue rate. */
2231 ARM_PREFETCH_NOT_BENEFICIAL
,
2232 tune_params::PREF_CONST_POOL_TRUE
,
2233 tune_params::PREF_LDRD_FALSE
,
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2235 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2236 tune_params::DISPARAGE_FLAGS_NEITHER
,
2237 tune_params::PREF_NEON_64_FALSE
,
2238 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2239 tune_params::FUSE_NOTHING
,
2240 tune_params::SCHED_AUTOPREF_OFF
2243 /* Cortex-M7 tuning. */
2245 const struct tune_params arm_cortex_m7_tune
=
2248 NULL
, /* Sched adj cost. */
2249 arm_cortex_m7_branch_cost
,
2250 &arm_default_vec_cost
,
2251 0, /* Constant limit. */
2252 1, /* Max cond insns. */
2253 8, /* Memset max inline. */
2254 2, /* Issue rate. */
2255 ARM_PREFETCH_NOT_BENEFICIAL
,
2256 tune_params::PREF_CONST_POOL_TRUE
,
2257 tune_params::PREF_LDRD_FALSE
,
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2260 tune_params::DISPARAGE_FLAGS_NEITHER
,
2261 tune_params::PREF_NEON_64_FALSE
,
2262 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2263 tune_params::FUSE_NOTHING
,
2264 tune_params::SCHED_AUTOPREF_OFF
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2270 const struct tune_params arm_v6m_tune
=
2272 &generic_extra_costs
, /* Insn extra costs. */
2273 NULL
, /* Sched adj cost. */
2274 arm_default_branch_cost
,
2275 &arm_default_vec_cost
, /* Vectorizer costs. */
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 1, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL
,
2281 tune_params::PREF_CONST_POOL_FALSE
,
2282 tune_params::PREF_LDRD_FALSE
,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE
, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER
,
2286 tune_params::PREF_NEON_64_FALSE
,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2288 tune_params::FUSE_NOTHING
,
2289 tune_params::SCHED_AUTOPREF_OFF
2292 const struct tune_params arm_fa726te_tune
=
2294 &generic_extra_costs
, /* Insn extra costs. */
2295 fa726te_sched_adjust_cost
,
2296 arm_default_branch_cost
,
2297 &arm_default_vec_cost
,
2298 1, /* Constant limit. */
2299 5, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL
,
2303 tune_params::PREF_CONST_POOL_TRUE
,
2304 tune_params::PREF_LDRD_FALSE
,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE
, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER
,
2308 tune_params::PREF_NEON_64_FALSE
,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE
,
2310 tune_params::FUSE_NOTHING
,
2311 tune_params::SCHED_AUTOPREF_OFF
2314 /* Auto-generated CPU, FPU and architecture tables. */
2315 #include "arm-cpu-data.h"
2317 /* The name of the preprocessor macro to define for this architecture. PROFILE
2318 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319 is thus chosen to be big enough to hold the longest architecture name. */
2321 char arm_arch_name
[] = "__ARM_ARCH_PROFILE__";
2323 /* Supported TLS relocations. */
2331 TLS_DESCSEQ
/* GNU scheme */
2334 /* The maximum number of insns to be used when loading a constant. */
2336 arm_constant_limit (bool size_p
)
2338 return size_p
? 1 : current_tune
->constant_limit
;
2341 /* Emit an insn that's a simple single-set. Both the operands must be known
2343 inline static rtx_insn
*
2344 emit_set_insn (rtx x
, rtx y
)
2346 return emit_insn (gen_rtx_SET (x
, y
));
2349 /* Return the number of bits set in VALUE. */
2351 bit_count (unsigned long value
)
2353 unsigned long count
= 0;
2358 value
&= value
- 1; /* Clear the least-significant set bit. */
2364 /* Return the number of bits set in BMAP. */
2366 bitmap_popcount (const sbitmap bmap
)
2368 unsigned int count
= 0;
2370 sbitmap_iterator sbi
;
2372 EXECUTE_IF_SET_IN_BITMAP (bmap
, 0, n
, sbi
)
2381 } arm_fixed_mode_set
;
2383 /* A small helper for setting fixed-point library libfuncs. */
2386 arm_set_fixed_optab_libfunc (optab optable
, machine_mode mode
,
2387 const char *funcname
, const char *modename
,
2392 if (num_suffix
== 0)
2393 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
2395 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
2397 set_optab_libfunc (optable
, mode
, buffer
);
2401 arm_set_fixed_conv_libfunc (convert_optab optable
, machine_mode to
,
2402 machine_mode from
, const char *funcname
,
2403 const char *toname
, const char *fromname
)
2406 const char *maybe_suffix_2
= "";
2408 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2409 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
2410 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
2411 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
2412 maybe_suffix_2
= "2";
2414 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
2417 set_conv_libfunc (optable
, to
, from
, buffer
);
2420 /* Set up library functions unique to ARM. */
2423 arm_init_libfuncs (void)
2425 /* For Linux, we have access to kernel support for atomic operations. */
2426 if (arm_abi
== ARM_ABI_AAPCS_LINUX
)
2427 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE
);
2429 /* There are no special library functions unless we are using the
2434 /* The functions below are described in Section 4 of the "Run-Time
2435 ABI for the ARM architecture", Version 1.0. */
2437 /* Double-precision floating-point arithmetic. Table 2. */
2438 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
2439 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
2440 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
2441 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
2442 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
2444 /* Double-precision comparisons. Table 3. */
2445 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
2446 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
2447 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
2448 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
2449 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
2450 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
2451 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
2453 /* Single-precision floating-point arithmetic. Table 4. */
2454 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
2455 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
2456 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
2457 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
2458 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
2460 /* Single-precision comparisons. Table 5. */
2461 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
2462 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
2463 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
2464 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
2465 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
2466 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
2467 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
2469 /* Floating-point to integer conversions. Table 6. */
2470 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
2471 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
2472 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
2473 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
2474 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
2475 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
2476 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
2477 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
2479 /* Conversions between floating types. Table 7. */
2480 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
2481 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
2483 /* Integer to floating-point conversions. Table 8. */
2484 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
2485 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
2486 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
2487 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
2488 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
2489 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
2490 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
2491 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
2493 /* Long long. Table 9. */
2494 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
2495 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
2496 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
2497 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
2498 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
2499 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
2500 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
2501 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
2503 /* Integer (32/32->32) division. \S 4.3.1. */
2504 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
2505 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
2507 /* The divmod functions are designed so that they can be used for
2508 plain division, even though they return both the quotient and the
2509 remainder. The quotient is returned in the usual location (i.e.,
2510 r0 for SImode, {r0, r1} for DImode), just as would be expected
2511 for an ordinary division routine. Because the AAPCS calling
2512 conventions specify that all of { r0, r1, r2, r3 } are
2513 callee-saved registers, there is no need to tell the compiler
2514 explicitly that those registers are clobbered by these
2516 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
2517 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
2519 /* For SImode division the ABI provides div-without-mod routines,
2520 which are faster. */
2521 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
2522 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
2524 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2525 divmod libcalls instead. */
2526 set_optab_libfunc (smod_optab
, DImode
, NULL
);
2527 set_optab_libfunc (umod_optab
, DImode
, NULL
);
2528 set_optab_libfunc (smod_optab
, SImode
, NULL
);
2529 set_optab_libfunc (umod_optab
, SImode
, NULL
);
2531 /* Half-precision float operations. The compiler handles all operations
2532 with NULL libfuncs by converting the SFmode. */
2533 switch (arm_fp16_format
)
2535 case ARM_FP16_FORMAT_IEEE
:
2536 case ARM_FP16_FORMAT_ALTERNATIVE
:
2539 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
2540 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2542 : "__gnu_f2h_alternative"));
2543 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
2544 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2546 : "__gnu_h2f_alternative"));
2548 set_conv_libfunc (trunc_optab
, HFmode
, DFmode
,
2549 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
2551 : "__gnu_d2h_alternative"));
2554 set_optab_libfunc (add_optab
, HFmode
, NULL
);
2555 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
2556 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
2557 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
2558 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
2561 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
2562 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
2563 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
2564 set_optab_libfunc (le_optab
, HFmode
, NULL
);
2565 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
2566 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
2567 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
2574 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2576 const arm_fixed_mode_set fixed_arith_modes
[] =
2579 { E_UQQmode
, "uqq" },
2581 { E_UHQmode
, "uhq" },
2583 { E_USQmode
, "usq" },
2585 { E_UDQmode
, "udq" },
2587 { E_UTQmode
, "utq" },
2589 { E_UHAmode
, "uha" },
2591 { E_USAmode
, "usa" },
2593 { E_UDAmode
, "uda" },
2595 { E_UTAmode
, "uta" }
2597 const arm_fixed_mode_set fixed_conv_modes
[] =
2600 { E_UQQmode
, "uqq" },
2602 { E_UHQmode
, "uhq" },
2604 { E_USQmode
, "usq" },
2606 { E_UDQmode
, "udq" },
2608 { E_UTQmode
, "utq" },
2610 { E_UHAmode
, "uha" },
2612 { E_USAmode
, "usa" },
2614 { E_UDAmode
, "uda" },
2616 { E_UTAmode
, "uta" },
2627 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
2629 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
2630 "add", fixed_arith_modes
[i
].name
, 3);
2631 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
2632 "ssadd", fixed_arith_modes
[i
].name
, 3);
2633 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
2634 "usadd", fixed_arith_modes
[i
].name
, 3);
2635 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
2636 "sub", fixed_arith_modes
[i
].name
, 3);
2637 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
2638 "sssub", fixed_arith_modes
[i
].name
, 3);
2639 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
2640 "ussub", fixed_arith_modes
[i
].name
, 3);
2641 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
2642 "mul", fixed_arith_modes
[i
].name
, 3);
2643 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
2644 "ssmul", fixed_arith_modes
[i
].name
, 3);
2645 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
2646 "usmul", fixed_arith_modes
[i
].name
, 3);
2647 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
2648 "div", fixed_arith_modes
[i
].name
, 3);
2649 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
2650 "udiv", fixed_arith_modes
[i
].name
, 3);
2651 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
2652 "ssdiv", fixed_arith_modes
[i
].name
, 3);
2653 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
2654 "usdiv", fixed_arith_modes
[i
].name
, 3);
2655 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
2656 "neg", fixed_arith_modes
[i
].name
, 2);
2657 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
2658 "ssneg", fixed_arith_modes
[i
].name
, 2);
2659 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
2660 "usneg", fixed_arith_modes
[i
].name
, 2);
2661 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
2662 "ashl", fixed_arith_modes
[i
].name
, 3);
2663 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
2664 "ashr", fixed_arith_modes
[i
].name
, 3);
2665 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
2666 "lshr", fixed_arith_modes
[i
].name
, 3);
2667 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
2668 "ssashl", fixed_arith_modes
[i
].name
, 3);
2669 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
2670 "usashl", fixed_arith_modes
[i
].name
, 3);
2671 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
2672 "cmp", fixed_arith_modes
[i
].name
, 2);
2675 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
2676 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
2679 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
2680 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
2683 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
2684 fixed_conv_modes
[j
].mode
, "fract",
2685 fixed_conv_modes
[i
].name
,
2686 fixed_conv_modes
[j
].name
);
2687 arm_set_fixed_conv_libfunc (satfract_optab
,
2688 fixed_conv_modes
[i
].mode
,
2689 fixed_conv_modes
[j
].mode
, "satfract",
2690 fixed_conv_modes
[i
].name
,
2691 fixed_conv_modes
[j
].name
);
2692 arm_set_fixed_conv_libfunc (fractuns_optab
,
2693 fixed_conv_modes
[i
].mode
,
2694 fixed_conv_modes
[j
].mode
, "fractuns",
2695 fixed_conv_modes
[i
].name
,
2696 fixed_conv_modes
[j
].name
);
2697 arm_set_fixed_conv_libfunc (satfractuns_optab
,
2698 fixed_conv_modes
[i
].mode
,
2699 fixed_conv_modes
[j
].mode
, "satfractuns",
2700 fixed_conv_modes
[i
].name
,
2701 fixed_conv_modes
[j
].name
);
2705 if (TARGET_AAPCS_BASED
)
2706 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
2709 /* On AAPCS systems, this is the "struct __va_list". */
2710 static GTY(()) tree va_list_type
;
2712 /* Return the type to use as __builtin_va_list. */
2714 arm_build_builtin_va_list (void)
2719 if (!TARGET_AAPCS_BASED
)
2720 return std_build_builtin_va_list ();
2722 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2730 The C Library ABI further reinforces this definition in \S
2733 We must follow this definition exactly. The structure tag
2734 name is visible in C++ mangled names, and thus forms a part
2735 of the ABI. The field name may be used by people who
2736 #include <stdarg.h>. */
2737 /* Create the type. */
2738 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
2739 /* Give it the required name. */
2740 va_list_name
= build_decl (BUILTINS_LOCATION
,
2742 get_identifier ("__va_list"),
2744 DECL_ARTIFICIAL (va_list_name
) = 1;
2745 TYPE_NAME (va_list_type
) = va_list_name
;
2746 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
2747 /* Create the __ap field. */
2748 ap_field
= build_decl (BUILTINS_LOCATION
,
2750 get_identifier ("__ap"),
2752 DECL_ARTIFICIAL (ap_field
) = 1;
2753 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
2754 TYPE_FIELDS (va_list_type
) = ap_field
;
2755 /* Compute its layout. */
2756 layout_type (va_list_type
);
2758 return va_list_type
;
2761 /* Return an expression of type "void *" pointing to the next
2762 available argument in a variable-argument list. VALIST is the
2763 user-level va_list object, of type __builtin_va_list. */
2765 arm_extract_valist_ptr (tree valist
)
2767 if (TREE_TYPE (valist
) == error_mark_node
)
2768 return error_mark_node
;
2770 /* On an AAPCS target, the pointer is stored within "struct
2772 if (TARGET_AAPCS_BASED
)
2774 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
2775 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
2776 valist
, ap_field
, NULL_TREE
);
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2784 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
2786 valist
= arm_extract_valist_ptr (valist
);
2787 std_expand_builtin_va_start (valist
, nextarg
);
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2792 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
2795 valist
= arm_extract_valist_ptr (valist
);
2796 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
2799 /* Check any incompatible options that the user has specified. */
2801 arm_option_check_internal (struct gcc_options
*opts
)
2803 int flags
= opts
->x_target_flags
;
2805 /* iWMMXt and NEON are incompatible. */
2807 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_neon
))
2808 error ("iWMMXt and NEON are incompatible");
2810 /* Make sure that the processor choice does not conflict with any of the
2811 other command line choices. */
2812 if (TARGET_ARM_P (flags
)
2813 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
))
2814 error ("target CPU does not support ARM mode");
2816 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2817 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM_P (flags
))
2818 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2820 if (TARGET_ARM_P (flags
) && TARGET_CALLEE_INTERWORKING
)
2821 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2823 /* If this target is normally configured to use APCS frames, warn if they
2824 are turned off and debugging is turned on. */
2825 if (TARGET_ARM_P (flags
)
2826 && write_symbols
!= NO_DEBUG
2827 && !TARGET_APCS_FRAME
2828 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
2829 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2831 /* iWMMXt unsupported under Thumb mode. */
2832 if (TARGET_THUMB_P (flags
) && TARGET_IWMMXT
)
2833 error ("iWMMXt unsupported under Thumb mode");
2835 if (TARGET_HARD_TP
&& TARGET_THUMB1_P (flags
))
2836 error ("can not use -mtp=cp15 with 16-bit Thumb");
2838 if (TARGET_THUMB_P (flags
) && TARGET_VXWORKS_RTP
&& flag_pic
)
2840 error ("RTP PIC is incompatible with Thumb");
2844 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2846 if ((target_pure_code
|| target_slow_flash_data
)
2847 && (!TARGET_HAVE_MOVT
|| arm_arch_notm
|| flag_pic
|| TARGET_NEON
))
2849 const char *flag
= (target_pure_code
? "-mpure-code" :
2850 "-mslow-flash-data");
2851 error ("%s only supports non-pic code on M-profile targets with the "
2852 "MOVT instruction", flag
);
2857 /* Recompute the global settings depending on target attribute options. */
2860 arm_option_params_internal (void)
2862 /* If we are not using the default (ARM mode) section anchor offset
2863 ranges, then set the correct ranges now. */
2866 /* Thumb-1 LDR instructions cannot have negative offsets.
2867 Permissible positive offset ranges are 5-bit (for byte loads),
2868 6-bit (for halfword loads), or 7-bit (for word loads).
2869 Empirical results suggest a 7-bit anchor range gives the best
2870 overall code size. */
2871 targetm
.min_anchor_offset
= 0;
2872 targetm
.max_anchor_offset
= 127;
2874 else if (TARGET_THUMB2
)
2876 /* The minimum is set such that the total size of the block
2877 for a particular anchor is 248 + 1 + 4095 bytes, which is
2878 divisible by eight, ensuring natural spacing of anchors. */
2879 targetm
.min_anchor_offset
= -248;
2880 targetm
.max_anchor_offset
= 4095;
2884 targetm
.min_anchor_offset
= TARGET_MIN_ANCHOR_OFFSET
;
2885 targetm
.max_anchor_offset
= TARGET_MAX_ANCHOR_OFFSET
;
2888 /* Increase the number of conditional instructions with -Os. */
2889 max_insns_skipped
= optimize_size
? 4 : current_tune
->max_insns_skipped
;
2891 /* For THUMB2, we limit the conditional sequence to one IT block. */
2893 max_insns_skipped
= MIN (max_insns_skipped
, MAX_INSN_PER_IT_BLOCK
);
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper
;
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize
;
2904 arm_override_options_after_change_1 (struct gcc_options
*opts
)
2906 if (opts
->x_align_functions
<= 0)
2907 opts
->x_align_functions
= TARGET_THUMB_P (opts
->x_target_flags
)
2908 && opts
->x_optimize_size
? 2 : 4;
2911 /* Implement targetm.override_options_after_change. */
2914 arm_override_options_after_change (void)
2916 arm_configure_build_target (&arm_active_target
,
2917 TREE_TARGET_OPTION (target_option_default_node
),
2918 &global_options_set
, false);
2920 arm_override_options_after_change_1 (&global_options
);
2923 /* Implement TARGET_OPTION_SAVE. */
2925 arm_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
2927 ptr
->x_arm_arch_string
= opts
->x_arm_arch_string
;
2928 ptr
->x_arm_cpu_string
= opts
->x_arm_cpu_string
;
2929 ptr
->x_arm_tune_string
= opts
->x_arm_tune_string
;
2932 /* Implement TARGET_OPTION_RESTORE. */
2934 arm_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
2936 opts
->x_arm_arch_string
= ptr
->x_arm_arch_string
;
2937 opts
->x_arm_cpu_string
= ptr
->x_arm_cpu_string
;
2938 opts
->x_arm_tune_string
= ptr
->x_arm_tune_string
;
2939 arm_configure_build_target (&arm_active_target
, ptr
, &global_options_set
,
2943 /* Reset options between modes that the user has specified. */
2945 arm_option_override_internal (struct gcc_options
*opts
,
2946 struct gcc_options
*opts_set
)
2948 arm_override_options_after_change_1 (opts
);
2950 if (TARGET_INTERWORK
&& !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2952 /* The default is to enable interworking, so this warning message would
2953 be confusing to users who have just compiled with, eg, -march=armv3. */
2954 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955 opts
->x_target_flags
&= ~MASK_INTERWORK
;
2958 if (TARGET_THUMB_P (opts
->x_target_flags
)
2959 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
2961 warning (0, "target CPU does not support THUMB instructions");
2962 opts
->x_target_flags
&= ~MASK_THUMB
;
2965 if (TARGET_APCS_FRAME
&& TARGET_THUMB_P (opts
->x_target_flags
))
2967 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968 opts
->x_target_flags
&= ~MASK_APCS_FRAME
;
2971 /* Callee super interworking implies thumb interworking. Adding
2972 this to the flags here simplifies the logic elsewhere. */
2973 if (TARGET_THUMB_P (opts
->x_target_flags
) && TARGET_CALLEE_INTERWORKING
)
2974 opts
->x_target_flags
|= MASK_INTERWORK
;
2976 /* need to remember initial values so combinaisons of options like
2977 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2978 cl_optimization
*to
= TREE_OPTIMIZATION (init_optimize
);
2980 if (! opts_set
->x_arm_restrict_it
)
2981 opts
->x_arm_restrict_it
= arm_arch8
;
2983 /* ARM execution state and M profile don't have [restrict] IT. */
2984 if (!TARGET_THUMB2_P (opts
->x_target_flags
) || !arm_arch_notm
)
2985 opts
->x_arm_restrict_it
= 0;
2987 /* Enable -munaligned-access by default for
2988 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989 i.e. Thumb2 and ARM state only.
2990 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991 - ARMv8 architecture-base processors.
2993 Disable -munaligned-access by default for
2994 - all pre-ARMv6 architecture-based processors
2995 - ARMv6-M architecture-based processors
2996 - ARMv8-M Baseline processors. */
2998 if (! opts_set
->x_unaligned_access
)
3000 opts
->x_unaligned_access
= (TARGET_32BIT_P (opts
->x_target_flags
)
3001 && arm_arch6
&& (arm_arch_notm
|| arm_arch7
));
3003 else if (opts
->x_unaligned_access
== 1
3004 && !(arm_arch6
&& (arm_arch_notm
|| arm_arch7
)))
3006 warning (0, "target CPU does not support unaligned accesses");
3007 opts
->x_unaligned_access
= 0;
3010 /* Don't warn since it's on by default in -O2. */
3011 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3012 opts
->x_flag_schedule_insns
= 0;
3014 opts
->x_flag_schedule_insns
= to
->x_flag_schedule_insns
;
3016 /* Disable shrink-wrap when optimizing function for size, since it tends to
3017 generate additional returns. */
3018 if (optimize_function_for_size_p (cfun
)
3019 && TARGET_THUMB2_P (opts
->x_target_flags
))
3020 opts
->x_flag_shrink_wrap
= false;
3022 opts
->x_flag_shrink_wrap
= to
->x_flag_shrink_wrap
;
3024 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025 - epilogue_insns - does not accurately model the corresponding insns
3026 emitted in the asm file. In particular, see the comment in thumb_exit
3027 'Find out how many of the (return) argument registers we can corrupt'.
3028 As a consequence, the epilogue may clobber registers without fipa-ra
3029 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3030 TODO: Accurately model clobbers for epilogue_insns and reenable
3032 if (TARGET_THUMB1_P (opts
->x_target_flags
))
3033 opts
->x_flag_ipa_ra
= 0;
3035 opts
->x_flag_ipa_ra
= to
->x_flag_ipa_ra
;
3037 /* Thumb2 inline assembly code should always use unified syntax.
3038 This will apply to ARM and Thumb1 eventually. */
3039 opts
->x_inline_asm_unified
= TARGET_THUMB2_P (opts
->x_target_flags
);
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
;
3046 static sbitmap isa_all_fpubits
;
3047 static sbitmap isa_quirkbits
;
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051 architecture have been specified, but the two are not identical. */
3053 arm_configure_build_target (struct arm_build_target
*target
,
3054 struct cl_target_option
*opts
,
3055 struct gcc_options
*opts_set
,
3056 bool warn_compatible
)
3058 const cpu_option
*arm_selected_tune
= NULL
;
3059 const arch_option
*arm_selected_arch
= NULL
;
3060 const cpu_option
*arm_selected_cpu
= NULL
;
3061 const arm_fpu_desc
*arm_selected_fpu
= NULL
;
3062 const char *tune_opts
= NULL
;
3063 const char *arch_opts
= NULL
;
3064 const char *cpu_opts
= NULL
;
3066 bitmap_clear (target
->isa
);
3067 target
->core_name
= NULL
;
3068 target
->arch_name
= NULL
;
3070 if (opts_set
->x_arm_arch_string
)
3072 arm_selected_arch
= arm_parse_arch_option_name (all_architectures
,
3074 opts
->x_arm_arch_string
);
3075 arch_opts
= strchr (opts
->x_arm_arch_string
, '+');
3078 if (opts_set
->x_arm_cpu_string
)
3080 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "-mcpu",
3081 opts
->x_arm_cpu_string
);
3082 cpu_opts
= strchr (opts
->x_arm_cpu_string
, '+');
3083 arm_selected_tune
= arm_selected_cpu
;
3084 /* If taking the tuning from -mcpu, we don't need to rescan the
3085 options for tuning. */
3088 if (opts_set
->x_arm_tune_string
)
3090 arm_selected_tune
= arm_parse_cpu_option_name (all_cores
, "-mtune",
3091 opts
->x_arm_tune_string
);
3092 tune_opts
= strchr (opts
->x_arm_tune_string
, '+');
3095 if (arm_selected_arch
)
3097 arm_initialize_isa (target
->isa
, arm_selected_arch
->common
.isa_bits
);
3098 arm_parse_option_features (target
->isa
, &arm_selected_arch
->common
,
3101 if (arm_selected_cpu
)
3103 auto_sbitmap
cpu_isa (isa_num_bits
);
3104 auto_sbitmap
isa_delta (isa_num_bits
);
3106 arm_initialize_isa (cpu_isa
, arm_selected_cpu
->common
.isa_bits
);
3107 arm_parse_option_features (cpu_isa
, &arm_selected_cpu
->common
,
3109 bitmap_xor (isa_delta
, cpu_isa
, target
->isa
);
3110 /* Ignore any bits that are quirk bits. */
3111 bitmap_and_compl (isa_delta
, isa_delta
, isa_quirkbits
);
3112 /* Ignore (for now) any bits that might be set by -mfpu. */
3113 bitmap_and_compl (isa_delta
, isa_delta
, isa_all_fpubits
);
3115 if (!bitmap_empty_p (isa_delta
))
3117 if (warn_compatible
)
3118 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119 arm_selected_cpu
->common
.name
,
3120 arm_selected_arch
->common
.name
);
3121 /* -march wins for code generation.
3122 -mcpu wins for default tuning. */
3123 if (!arm_selected_tune
)
3124 arm_selected_tune
= arm_selected_cpu
;
3126 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3127 target
->arch_name
= arm_selected_arch
->common
.name
;
3131 /* Architecture and CPU are essentially the same.
3132 Prefer the CPU setting. */
3133 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3134 target
->core_name
= arm_selected_cpu
->common
.name
;
3135 /* Copy the CPU's capabilities, so that we inherit the
3136 appropriate extensions and quirks. */
3137 bitmap_copy (target
->isa
, cpu_isa
);
3142 /* Pick a CPU based on the architecture. */
3143 arm_selected_cpu
= all_cores
+ arm_selected_arch
->tune_id
;
3144 target
->arch_name
= arm_selected_arch
->common
.name
;
3145 /* Note: target->core_name is left unset in this path. */
3148 else if (arm_selected_cpu
)
3150 target
->core_name
= arm_selected_cpu
->common
.name
;
3151 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3152 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3154 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3156 /* If the user did not specify a processor or architecture, choose
3160 const cpu_option
*sel
;
3161 auto_sbitmap
sought_isa (isa_num_bits
);
3162 bitmap_clear (sought_isa
);
3163 auto_sbitmap
default_isa (isa_num_bits
);
3165 arm_selected_cpu
= arm_parse_cpu_option_name (all_cores
, "default CPU",
3166 TARGET_CPU_DEFAULT
);
3167 cpu_opts
= strchr (TARGET_CPU_DEFAULT
, '+');
3168 gcc_assert (arm_selected_cpu
->common
.name
);
3170 /* RWE: All of the selection logic below (to the end of this
3171 'if' clause) looks somewhat suspect. It appears to be mostly
3172 there to support forcing thumb support when the default CPU
3173 does not have thumb (somewhat dubious in terms of what the
3174 user might be expecting). I think it should be removed once
3175 support for the pre-thumb era cores is removed. */
3176 sel
= arm_selected_cpu
;
3177 arm_initialize_isa (default_isa
, sel
->common
.isa_bits
);
3178 arm_parse_option_features (default_isa
, &arm_selected_cpu
->common
,
3181 /* Now check to see if the user has specified any command line
3182 switches that require certain abilities from the cpu. */
3184 if (TARGET_INTERWORK
|| TARGET_THUMB
)
3186 bitmap_set_bit (sought_isa
, isa_bit_thumb
);
3187 bitmap_set_bit (sought_isa
, isa_bit_mode32
);
3189 /* There are no ARM processors that support both APCS-26 and
3190 interworking. Therefore we forcibly remove MODE26 from
3191 from the isa features here (if it was set), so that the
3192 search below will always be able to find a compatible
3194 bitmap_clear_bit (default_isa
, isa_bit_mode26
);
3197 /* If there are such requirements and the default CPU does not
3198 satisfy them, we need to run over the complete list of
3199 cores looking for one that is satisfactory. */
3200 if (!bitmap_empty_p (sought_isa
)
3201 && !bitmap_subset_p (sought_isa
, default_isa
))
3203 auto_sbitmap
candidate_isa (isa_num_bits
);
3204 /* We're only interested in a CPU with at least the
3205 capabilities of the default CPU and the required
3206 additional features. */
3207 bitmap_ior (default_isa
, default_isa
, sought_isa
);
3209 /* Try to locate a CPU type that supports all of the abilities
3210 of the default CPU, plus the extra abilities requested by
3212 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3214 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3215 /* An exact match? */
3216 if (bitmap_equal_p (default_isa
, candidate_isa
))
3220 if (sel
->common
.name
== NULL
)
3222 unsigned current_bit_count
= isa_num_bits
;
3223 const cpu_option
*best_fit
= NULL
;
3225 /* Ideally we would like to issue an error message here
3226 saying that it was not possible to find a CPU compatible
3227 with the default CPU, but which also supports the command
3228 line options specified by the programmer, and so they
3229 ought to use the -mcpu=<name> command line option to
3230 override the default CPU type.
3232 If we cannot find a CPU that has exactly the
3233 characteristics of the default CPU and the given
3234 command line options we scan the array again looking
3235 for a best match. The best match must have at least
3236 the capabilities of the perfect match. */
3237 for (sel
= all_cores
; sel
->common
.name
!= NULL
; sel
++)
3239 arm_initialize_isa (candidate_isa
, sel
->common
.isa_bits
);
3241 if (bitmap_subset_p (default_isa
, candidate_isa
))
3245 bitmap_and_compl (candidate_isa
, candidate_isa
,
3247 count
= bitmap_popcount (candidate_isa
);
3249 if (count
< current_bit_count
)
3252 current_bit_count
= count
;
3256 gcc_assert (best_fit
);
3260 arm_selected_cpu
= sel
;
3263 /* Now we know the CPU, we can finally initialize the target
3265 target
->core_name
= arm_selected_cpu
->common
.name
;
3266 arm_initialize_isa (target
->isa
, arm_selected_cpu
->common
.isa_bits
);
3267 arm_parse_option_features (target
->isa
, &arm_selected_cpu
->common
,
3269 arm_selected_arch
= all_architectures
+ arm_selected_cpu
->arch
;
3272 gcc_assert (arm_selected_cpu
);
3273 gcc_assert (arm_selected_arch
);
3275 if (opts
->x_arm_fpu_index
!= TARGET_FPU_auto
)
3277 arm_selected_fpu
= &all_fpus
[opts
->x_arm_fpu_index
];
3278 auto_sbitmap
fpu_bits (isa_num_bits
);
3280 arm_initialize_isa (fpu_bits
, arm_selected_fpu
->isa_bits
);
3281 bitmap_and_compl (target
->isa
, target
->isa
, isa_all_fpubits
);
3282 bitmap_ior (target
->isa
, target
->isa
, fpu_bits
);
3285 if (!arm_selected_tune
)
3286 arm_selected_tune
= arm_selected_cpu
;
3287 else /* Validate the features passed to -mtune. */
3288 arm_parse_option_features (NULL
, &arm_selected_tune
->common
, tune_opts
);
3290 const cpu_tune
*tune_data
= &all_tunes
[arm_selected_tune
- all_cores
];
3292 /* Finish initializing the target structure. */
3293 target
->arch_pp_name
= arm_selected_arch
->arch
;
3294 target
->base_arch
= arm_selected_arch
->base_arch
;
3295 target
->profile
= arm_selected_arch
->profile
;
3297 target
->tune_flags
= tune_data
->tune_flags
;
3298 target
->tune
= tune_data
->tune
;
3299 target
->tune_core
= tune_data
->scheduler
;
3302 /* Fix up any incompatible options that the user has specified. */
3304 arm_option_override (void)
3306 static const enum isa_feature fpu_bitlist
[]
3307 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
3308 static const enum isa_feature quirk_bitlist
[] = { ISA_ALL_QUIRKS
, isa_nobit
};
3309 cl_target_option opts
;
3311 isa_quirkbits
= sbitmap_alloc (isa_num_bits
);
3312 arm_initialize_isa (isa_quirkbits
, quirk_bitlist
);
3314 isa_all_fpubits
= sbitmap_alloc (isa_num_bits
);
3315 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
3317 arm_active_target
.isa
= sbitmap_alloc (isa_num_bits
);
3319 if (!global_options_set
.x_arm_fpu_index
)
3324 ok
= opt_enum_arg_to_value (OPT_mfpu_
, FPUTYPE_AUTO
, &fpu_index
,
3327 arm_fpu_index
= (enum fpu_type
) fpu_index
;
3330 cl_target_option_save (&opts
, &global_options
);
3331 arm_configure_build_target (&arm_active_target
, &opts
, &global_options_set
,
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335 SUBTARGET_OVERRIDE_OPTIONS
;
3338 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_active_target
.arch_pp_name
);
3339 arm_base_arch
= arm_active_target
.base_arch
;
3341 arm_tune
= arm_active_target
.tune_core
;
3342 tune_flags
= arm_active_target
.tune_flags
;
3343 current_tune
= arm_active_target
.tune
;
3345 /* TBD: Dwarf info for apcs frame is not handled yet. */
3346 if (TARGET_APCS_FRAME
)
3347 flag_shrink_wrap
= false;
3349 /* BPABI targets use linker tricks to allow interworking on cores
3350 without thumb support. */
3351 if (TARGET_INTERWORK
3353 && !bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
))
3355 warning (0, "target CPU does not support interworking" );
3356 target_flags
&= ~MASK_INTERWORK
;
3359 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
3361 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3362 target_flags
|= MASK_APCS_FRAME
;
3365 if (TARGET_POKE_FUNCTION_NAME
)
3366 target_flags
|= MASK_APCS_FRAME
;
3368 if (TARGET_APCS_REENT
&& flag_pic
)
3369 error ("-fpic and -mapcs-reent are incompatible");
3371 if (TARGET_APCS_REENT
)
3372 warning (0, "APCS reentrant code not supported. Ignored");
3374 /* Initialize boolean versions of the architectural flags, for use
3375 in the arm.md file. */
3376 arm_arch3m
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv3m
);
3377 arm_arch4
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv4
);
3378 arm_arch4t
= arm_arch4
&& bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3379 arm_arch5
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5
);
3380 arm_arch5e
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv5e
);
3381 arm_arch5te
= arm_arch5e
3382 && bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3383 arm_arch6
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6
);
3384 arm_arch6k
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv6k
);
3385 arm_arch_notm
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_notm
);
3386 arm_arch6m
= arm_arch6
&& !arm_arch_notm
;
3387 arm_arch7
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7
);
3388 arm_arch7em
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv7em
);
3389 arm_arch8
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8
);
3390 arm_arch8_1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_1
);
3391 arm_arch8_2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_armv8_2
);
3392 arm_arch_thumb1
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb
);
3393 arm_arch_thumb2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_thumb2
);
3394 arm_arch_xscale
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_xscale
);
3395 arm_arch_iwmmxt
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt
);
3396 arm_arch_iwmmxt2
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_iwmmxt2
);
3397 arm_arch_thumb_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_tdiv
);
3398 arm_arch_arm_hwdiv
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_adiv
);
3399 arm_arch_crc
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_crc32
);
3400 arm_arch_cmse
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_cmse
);
3401 arm_fp16_inst
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_fp16
);
3402 arm_arch_lpae
= bitmap_bit_p (arm_active_target
.isa
, isa_bit_lpae
);
3405 if (arm_fp16_format
== ARM_FP16_FORMAT_ALTERNATIVE
)
3406 error ("selected fp16 options are incompatible");
3407 arm_fp16_format
= ARM_FP16_FORMAT_IEEE
;
3411 /* Set up some tuning parameters. */
3412 arm_ld_sched
= (tune_flags
& TF_LDSCHED
) != 0;
3413 arm_tune_strongarm
= (tune_flags
& TF_STRONG
) != 0;
3414 arm_tune_wbuf
= (tune_flags
& TF_WBUF
) != 0;
3415 arm_tune_xscale
= (tune_flags
& TF_XSCALE
) != 0;
3416 arm_tune_cortex_a9
= (arm_tune
== TARGET_CPU_cortexa9
) != 0;
3417 arm_m_profile_small_mul
= (tune_flags
& TF_SMALLMUL
) != 0;
3419 /* And finally, set up some quirks. */
3420 arm_arch_no_volatile_ce
3421 = bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_no_volatile_ce
);
3422 arm_arch6kz
= arm_arch6k
&& bitmap_bit_p (arm_active_target
.isa
,
3423 isa_bit_quirk_armv6kz
);
3425 /* V5 code we generate is completely interworking capable, so we turn off
3426 TARGET_INTERWORK here to avoid many tests later on. */
3428 /* XXX However, we must pass the right pre-processor defines to CPP
3429 or GLD can get confused. This is a hack. */
3430 if (TARGET_INTERWORK
)
3431 arm_cpp_interwork
= 1;
3434 target_flags
&= ~MASK_INTERWORK
;
3436 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
3437 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3439 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
3440 error ("iwmmxt abi requires an iwmmxt capable cpu");
3442 /* If soft-float is specified then don't use FPU. */
3443 if (TARGET_SOFT_FLOAT
)
3444 arm_fpu_attr
= FPU_NONE
;
3446 arm_fpu_attr
= FPU_VFP
;
3448 if (TARGET_AAPCS_BASED
)
3450 if (TARGET_CALLER_INTERWORKING
)
3451 error ("AAPCS does not support -mcaller-super-interworking");
3453 if (TARGET_CALLEE_INTERWORKING
)
3454 error ("AAPCS does not support -mcallee-super-interworking");
3457 /* __fp16 support currently assumes the core has ldrh. */
3458 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
3459 sorry ("__fp16 and no ldrh");
3461 if (TARGET_AAPCS_BASED
)
3463 if (arm_abi
== ARM_ABI_IWMMXT
)
3464 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
3465 else if (TARGET_HARD_FLOAT_ABI
)
3467 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
3468 if (!bitmap_bit_p (arm_active_target
.isa
, isa_bit_vfpv2
))
3469 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3472 arm_pcs_default
= ARM_PCS_AAPCS
;
3476 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
)
3477 sorry ("-mfloat-abi=hard and VFP");
3479 if (arm_abi
== ARM_ABI_APCS
)
3480 arm_pcs_default
= ARM_PCS_APCS
;
3482 arm_pcs_default
= ARM_PCS_ATPCS
;
3485 /* For arm2/3 there is no need to do any scheduling if we are doing
3486 software floating-point. */
3487 if (TARGET_SOFT_FLOAT
&& (tune_flags
& TF_NO_MODE32
))
3488 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
3490 /* Use the cp15 method if it is available. */
3491 if (target_thread_pointer
== TP_AUTO
)
3493 if (arm_arch6k
&& !TARGET_THUMB1
)
3494 target_thread_pointer
= TP_CP15
;
3496 target_thread_pointer
= TP_SOFT
;
3499 /* Override the default structure alignment for AAPCS ABI. */
3500 if (!global_options_set
.x_arm_structure_size_boundary
)
3502 if (TARGET_AAPCS_BASED
)
3503 arm_structure_size_boundary
= 8;
3507 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3509 if (arm_structure_size_boundary
!= 8
3510 && arm_structure_size_boundary
!= 32
3511 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
3513 if (ARM_DOUBLEWORD_ALIGN
)
3515 "structure size boundary can only be set to 8, 32 or 64");
3517 warning (0, "structure size boundary can only be set to 8 or 32");
3518 arm_structure_size_boundary
3519 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
3523 if (TARGET_VXWORKS_RTP
)
3525 if (!global_options_set
.x_arm_pic_data_is_text_relative
)
3526 arm_pic_data_is_text_relative
= 0;
3529 && !arm_pic_data_is_text_relative
3530 && !(global_options_set
.x_target_flags
& MASK_SINGLE_PIC_BASE
))
3531 /* When text & data segments don't have a fixed displacement, the
3532 intended use is with a single, read only, pic base register.
3533 Unless the user explicitly requested not to do that, set
3535 target_flags
|= MASK_SINGLE_PIC_BASE
;
3537 /* If stack checking is disabled, we can use r10 as the PIC register,
3538 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3539 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
3541 if (TARGET_VXWORKS_RTP
)
3542 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3543 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
3546 if (flag_pic
&& TARGET_VXWORKS_RTP
)
3547 arm_pic_register
= 9;
3549 if (arm_pic_register_string
!= NULL
)
3551 int pic_register
= decode_reg_name (arm_pic_register_string
);
3554 warning (0, "-mpic-register= is useless without -fpic");
3556 /* Prevent the user from choosing an obviously stupid PIC register. */
3557 else if (pic_register
< 0 || call_used_regs
[pic_register
]
3558 || pic_register
== HARD_FRAME_POINTER_REGNUM
3559 || pic_register
== STACK_POINTER_REGNUM
3560 || pic_register
>= PC_REGNUM
3561 || (TARGET_VXWORKS_RTP
3562 && (unsigned int) pic_register
!= arm_pic_register
))
3563 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
3565 arm_pic_register
= pic_register
;
3568 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3569 if (fix_cm3_ldrd
== 2)
3571 if (bitmap_bit_p (arm_active_target
.isa
, isa_bit_quirk_cm3_ldrd
))
3577 /* Hot/Cold partitioning is not currently supported, since we can't
3578 handle literal pool placement in that case. */
3579 if (flag_reorder_blocks_and_partition
)
3581 inform (input_location
,
3582 "-freorder-blocks-and-partition not supported on this architecture");
3583 flag_reorder_blocks_and_partition
= 0;
3584 flag_reorder_blocks
= 1;
3588 /* Hoisting PIC address calculations more aggressively provides a small,
3589 but measurable, size reduction for PIC code. Therefore, we decrease
3590 the bar for unrestricted expression hoisting to the cost of PIC address
3591 calculation, which is 2 instructions. */
3592 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
3593 global_options
.x_param_values
,
3594 global_options_set
.x_param_values
);
3596 /* ARM EABI defaults to strict volatile bitfields. */
3597 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0
3598 && abi_version_at_least(2))
3599 flag_strict_volatile_bitfields
= 1;
3601 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3602 have deemed it beneficial (signified by setting
3603 prefetch.num_slots to 1 or more). */
3604 if (flag_prefetch_loop_arrays
< 0
3607 && current_tune
->prefetch
.num_slots
> 0)
3608 flag_prefetch_loop_arrays
= 1;
3610 /* Set up parameters to be used in prefetching algorithm. Do not
3611 override the defaults unless we are tuning for a core we have
3612 researched values for. */
3613 if (current_tune
->prefetch
.num_slots
> 0)
3614 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3615 current_tune
->prefetch
.num_slots
,
3616 global_options
.x_param_values
,
3617 global_options_set
.x_param_values
);
3618 if (current_tune
->prefetch
.l1_cache_line_size
>= 0)
3619 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3620 current_tune
->prefetch
.l1_cache_line_size
,
3621 global_options
.x_param_values
,
3622 global_options_set
.x_param_values
);
3623 if (current_tune
->prefetch
.l1_cache_size
>= 0)
3624 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3625 current_tune
->prefetch
.l1_cache_size
,
3626 global_options
.x_param_values
,
3627 global_options_set
.x_param_values
);
3629 /* Use Neon to perform 64-bits operations rather than core
3631 prefer_neon_for_64bits
= current_tune
->prefer_neon_for_64bits
;
3632 if (use_neon_for_64bits
== 1)
3633 prefer_neon_for_64bits
= true;
3635 /* Use the alternative scheduling-pressure algorithm by default. */
3636 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM
, SCHED_PRESSURE_MODEL
,
3637 global_options
.x_param_values
,
3638 global_options_set
.x_param_values
);
3640 /* Look through ready list and all of queue for instructions
3641 relevant for L2 auto-prefetcher. */
3642 int param_sched_autopref_queue_depth
;
3644 switch (current_tune
->sched_autopref
)
3646 case tune_params::SCHED_AUTOPREF_OFF
:
3647 param_sched_autopref_queue_depth
= -1;
3650 case tune_params::SCHED_AUTOPREF_RANK
:
3651 param_sched_autopref_queue_depth
= 0;
3654 case tune_params::SCHED_AUTOPREF_FULL
:
3655 param_sched_autopref_queue_depth
= max_insn_queue_index
+ 1;
3662 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
3663 param_sched_autopref_queue_depth
,
3664 global_options
.x_param_values
,
3665 global_options_set
.x_param_values
);
3667 /* Currently, for slow flash data, we just disable literal pools. We also
3668 disable it for pure-code. */
3669 if (target_slow_flash_data
|| target_pure_code
)
3670 arm_disable_literal_pool
= true;
3672 if (use_cmse
&& !arm_arch_cmse
)
3673 error ("target CPU does not support ARMv8-M Security Extensions");
3675 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3676 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3677 if (use_cmse
&& LAST_VFP_REGNUM
> LAST_LO_VFP_REGNUM
)
3678 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3680 /* Disable scheduling fusion by default if it's not armv7 processor
3681 or doesn't prefer ldrd/strd. */
3682 if (flag_schedule_fusion
== 2
3683 && (!arm_arch7
|| !current_tune
->prefer_ldrd_strd
))
3684 flag_schedule_fusion
= 0;
3686 /* Need to remember initial options before they are overriden. */
3687 init_optimize
= build_optimization_node (&global_options
);
3689 arm_option_override_internal (&global_options
, &global_options_set
);
3690 arm_option_check_internal (&global_options
);
3691 arm_option_params_internal ();
3693 /* Create the default target_options structure. */
3694 target_option_default_node
= target_option_current_node
3695 = build_target_option_node (&global_options
);
3697 /* Register global variables with the garbage collector. */
3698 arm_add_gc_roots ();
3700 /* Init initial mode for testing. */
3701 thumb_flipper
= TARGET_THUMB
;
3705 arm_add_gc_roots (void)
3707 gcc_obstack_init(&minipool_obstack
);
3708 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
3711 /* A table of known ARM exception types.
3712 For use with the interrupt function attribute. */
3716 const char *const arg
;
3717 const unsigned long return_value
;
3721 static const isr_attribute_arg isr_attribute_args
[] =
3723 { "IRQ", ARM_FT_ISR
},
3724 { "irq", ARM_FT_ISR
},
3725 { "FIQ", ARM_FT_FIQ
},
3726 { "fiq", ARM_FT_FIQ
},
3727 { "ABORT", ARM_FT_ISR
},
3728 { "abort", ARM_FT_ISR
},
3729 { "ABORT", ARM_FT_ISR
},
3730 { "abort", ARM_FT_ISR
},
3731 { "UNDEF", ARM_FT_EXCEPTION
},
3732 { "undef", ARM_FT_EXCEPTION
},
3733 { "SWI", ARM_FT_EXCEPTION
},
3734 { "swi", ARM_FT_EXCEPTION
},
3735 { NULL
, ARM_FT_NORMAL
}
3738 /* Returns the (interrupt) function type of the current
3739 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3741 static unsigned long
3742 arm_isr_value (tree argument
)
3744 const isr_attribute_arg
* ptr
;
3748 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
3750 /* No argument - default to IRQ. */
3751 if (argument
== NULL_TREE
)
3754 /* Get the value of the argument. */
3755 if (TREE_VALUE (argument
) == NULL_TREE
3756 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
3757 return ARM_FT_UNKNOWN
;
3759 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
3761 /* Check it against the list of known arguments. */
3762 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3763 if (streq (arg
, ptr
->arg
))
3764 return ptr
->return_value
;
3766 /* An unrecognized interrupt type. */
3767 return ARM_FT_UNKNOWN
;
3770 /* Computes the type of the current function. */
3772 static unsigned long
3773 arm_compute_func_type (void)
3775 unsigned long type
= ARM_FT_UNKNOWN
;
3779 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
3781 /* Decide if the current function is volatile. Such functions
3782 never return, and many memory cycles can be saved by not storing
3783 register values that will never be needed again. This optimization
3784 was added to speed up context switching in a kernel application. */
3786 && (TREE_NOTHROW (current_function_decl
)
3787 || !(flag_unwind_tables
3789 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
3790 && TREE_THIS_VOLATILE (current_function_decl
))
3791 type
|= ARM_FT_VOLATILE
;
3793 if (cfun
->static_chain_decl
!= NULL
)
3794 type
|= ARM_FT_NESTED
;
3796 attr
= DECL_ATTRIBUTES (current_function_decl
);
3798 a
= lookup_attribute ("naked", attr
);
3800 type
|= ARM_FT_NAKED
;
3802 a
= lookup_attribute ("isr", attr
);
3804 a
= lookup_attribute ("interrupt", attr
);
3807 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
3809 type
|= arm_isr_value (TREE_VALUE (a
));
3811 if (lookup_attribute ("cmse_nonsecure_entry", attr
))
3812 type
|= ARM_FT_CMSE_ENTRY
;
3817 /* Returns the type of the current function. */
3820 arm_current_func_type (void)
3822 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
3823 cfun
->machine
->func_type
= arm_compute_func_type ();
3825 return cfun
->machine
->func_type
;
3829 arm_allocate_stack_slots_for_args (void)
3831 /* Naked functions should not allocate stack slots for arguments. */
3832 return !IS_NAKED (arm_current_func_type ());
3836 arm_warn_func_return (tree decl
)
3838 /* Naked functions are implemented entirely in assembly, including the
3839 return sequence, so suppress warnings about this. */
3840 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl
)) == NULL_TREE
;
3844 /* Output assembler code for a block containing the constant parts
3845 of a trampoline, leaving space for the variable parts.
3847 On the ARM, (if r8 is the static chain regnum, and remembering that
3848 referencing pc adds an offset of 8) the trampoline looks like:
3851 .word static chain value
3852 .word function's address
3853 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3856 arm_asm_trampoline_template (FILE *f
)
3858 fprintf (f
, "\t.syntax unified\n");
3862 fprintf (f
, "\t.arm\n");
3863 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3864 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
3866 else if (TARGET_THUMB2
)
3868 fprintf (f
, "\t.thumb\n");
3869 /* The Thumb-2 trampoline is similar to the arm implementation.
3870 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3871 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
3872 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
3873 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
3877 ASM_OUTPUT_ALIGN (f
, 2);
3878 fprintf (f
, "\t.code\t16\n");
3879 fprintf (f
, ".Ltrampoline_start:\n");
3880 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
3881 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3882 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
3883 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
3884 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
3885 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
3887 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3888 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
3891 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3894 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
3896 rtx fnaddr
, mem
, a_tramp
;
3898 emit_block_move (m_tramp
, assemble_trampoline_template (),
3899 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
3901 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
3902 emit_move_insn (mem
, chain_value
);
3904 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
3905 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
3906 emit_move_insn (mem
, fnaddr
);
3908 a_tramp
= XEXP (m_tramp
, 0);
3909 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
3910 LCT_NORMAL
, VOIDmode
, a_tramp
, Pmode
,
3911 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
3914 /* Thumb trampolines should be entered in thumb mode, so set
3915 the bottom bit of the address. */
3918 arm_trampoline_adjust_address (rtx addr
)
3921 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
3922 NULL
, 0, OPTAB_LIB_WIDEN
);
3926 /* Return 1 if it is possible to return using a single instruction.
3927 If SIBLING is non-null, this is a test for a return before a sibling
3928 call. SIBLING is the call insn, so we can examine its register usage. */
3931 use_return_insn (int iscond
, rtx sibling
)
3934 unsigned int func_type
;
3935 unsigned long saved_int_regs
;
3936 unsigned HOST_WIDE_INT stack_adjust
;
3937 arm_stack_offsets
*offsets
;
3939 /* Never use a return instruction before reload has run. */
3940 if (!reload_completed
)
3943 func_type
= arm_current_func_type ();
3945 /* Naked, volatile and stack alignment functions need special
3947 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
3950 /* So do interrupt functions that use the frame pointer and Thumb
3951 interrupt functions. */
3952 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
3955 if (TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
3956 && !optimize_function_for_size_p (cfun
))
3959 offsets
= arm_get_frame_offsets ();
3960 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
3962 /* As do variadic functions. */
3963 if (crtl
->args
.pretend_args_size
3964 || cfun
->machine
->uses_anonymous_args
3965 /* Or if the function calls __builtin_eh_return () */
3966 || crtl
->calls_eh_return
3967 /* Or if the function calls alloca */
3968 || cfun
->calls_alloca
3969 /* Or if there is a stack adjustment. However, if the stack pointer
3970 is saved on the stack, we can use a pre-incrementing stack load. */
3971 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
3972 && stack_adjust
== 4))
3973 /* Or if the static chain register was saved above the frame, under the
3974 assumption that the stack pointer isn't saved on the stack. */
3975 || (!(TARGET_APCS_FRAME
&& frame_pointer_needed
)
3976 && arm_compute_static_chain_stack_bytes() != 0))
3979 saved_int_regs
= offsets
->saved_regs_mask
;
3981 /* Unfortunately, the insn
3983 ldmib sp, {..., sp, ...}
3985 triggers a bug on most SA-110 based devices, such that the stack
3986 pointer won't be correctly restored if the instruction takes a
3987 page fault. We work around this problem by popping r3 along with
3988 the other registers, since that is never slower than executing
3989 another instruction.
3991 We test for !arm_arch5 here, because code for any architecture
3992 less than this could potentially be run on one of the buggy
3994 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
3996 /* Validate that r3 is a call-clobbered register (always true in
3997 the default abi) ... */
3998 if (!call_used_regs
[3])
4001 /* ... that it isn't being used for a return value ... */
4002 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
4005 /* ... or for a tail-call argument ... */
4008 gcc_assert (CALL_P (sibling
));
4010 if (find_regno_fusage (sibling
, USE
, 3))
4014 /* ... and that there are no call-saved registers in r0-r2
4015 (always true in the default ABI). */
4016 if (saved_int_regs
& 0x7)
4020 /* Can't be done if interworking with Thumb, and any registers have been
4022 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
4025 /* On StrongARM, conditional returns are expensive if they aren't
4026 taken and multiple registers have been stacked. */
4027 if (iscond
&& arm_tune_strongarm
)
4029 /* Conditional return when just the LR is stored is a simple
4030 conditional-load instruction, that's not expensive. */
4031 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
4035 && arm_pic_register
!= INVALID_REGNUM
4036 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
4040 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4041 several instructions if anything needs to be popped. */
4042 if (saved_int_regs
&& IS_CMSE_ENTRY (func_type
))
4045 /* If there are saved registers but the LR isn't saved, then we need
4046 two instructions for the return. */
4047 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
4050 /* Can't be done if any of the VFP regs are pushed,
4051 since this also requires an insn. */
4052 if (TARGET_HARD_FLOAT
)
4053 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
4054 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
4057 if (TARGET_REALLY_IWMMXT
)
4058 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
4059 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
4065 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4066 shrink-wrapping if possible. This is the case if we need to emit a
4067 prologue, which we can test by looking at the offsets. */
4069 use_simple_return_p (void)
4071 arm_stack_offsets
*offsets
;
4073 /* Note this function can be called before or after reload. */
4074 if (!reload_completed
)
4075 arm_compute_frame_layout ();
4077 offsets
= arm_get_frame_offsets ();
4078 return offsets
->outgoing_args
!= 0;
4081 /* Return TRUE if int I is a valid immediate ARM constant. */
4084 const_ok_for_arm (HOST_WIDE_INT i
)
4088 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4089 be all zero, or all one. */
4090 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
4091 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
4092 != ((~(unsigned HOST_WIDE_INT
) 0)
4093 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
4096 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
4098 /* Fast return for 0 and small values. We must do this for zero, since
4099 the code below can't handle that one case. */
4100 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
4103 /* Get the number of trailing zeros. */
4104 lowbit
= ffs((int) i
) - 1;
4106 /* Only even shifts are allowed in ARM mode so round down to the
4107 nearest even number. */
4111 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
4116 /* Allow rotated constants in ARM mode. */
4118 && ((i
& ~0xc000003f) == 0
4119 || (i
& ~0xf000000f) == 0
4120 || (i
& ~0xfc000003) == 0))
4123 else if (TARGET_THUMB2
)
4127 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4130 if (i
== v
|| i
== (v
| (v
<< 8)))
4133 /* Allow repeated pattern 0xXY00XY00. */
4139 else if (TARGET_HAVE_MOVT
)
4141 /* Thumb-1 Targets with MOVT. */
4151 /* Return true if I is a valid constant for the operation CODE. */
4153 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
4155 if (const_ok_for_arm (i
))
4161 /* See if we can use movw. */
4162 if (TARGET_HAVE_MOVT
&& (i
& 0xffff0000) == 0)
4165 /* Otherwise, try mvn. */
4166 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4169 /* See if we can use addw or subw. */
4171 && ((i
& 0xfffff000) == 0
4172 || ((-i
) & 0xfffff000) == 0))
4193 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
4195 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
4201 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4205 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
4212 /* Return true if I is a valid di mode constant for the operation CODE. */
4214 const_ok_for_dimode_op (HOST_WIDE_INT i
, enum rtx_code code
)
4216 HOST_WIDE_INT hi_val
= (i
>> 32) & 0xFFFFFFFF;
4217 HOST_WIDE_INT lo_val
= i
& 0xFFFFFFFF;
4218 rtx hi
= GEN_INT (hi_val
);
4219 rtx lo
= GEN_INT (lo_val
);
4229 return (const_ok_for_op (hi_val
, code
) || hi_val
== 0xFFFFFFFF)
4230 && (const_ok_for_op (lo_val
, code
) || lo_val
== 0xFFFFFFFF);
4232 return arm_not_operand (hi
, SImode
) && arm_add_operand (lo
, SImode
);
4239 /* Emit a sequence of insns to handle a large constant.
4240 CODE is the code of the operation required, it can be any of SET, PLUS,
4241 IOR, AND, XOR, MINUS;
4242 MODE is the mode in which the operation is being performed;
4243 VAL is the integer to operate on;
4244 SOURCE is the other operand (a register, or a null-pointer for SET);
4245 SUBTARGETS means it is safe to create scratch registers if that will
4246 either produce a simpler sequence, or we will want to cse the values.
4247 Return value is the number of insns emitted. */
4249 /* ??? Tweak this for thumb2. */
4251 arm_split_constant (enum rtx_code code
, machine_mode mode
, rtx insn
,
4252 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
4256 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
4257 cond
= COND_EXEC_TEST (PATTERN (insn
));
4261 if (subtargets
|| code
== SET
4262 || (REG_P (target
) && REG_P (source
)
4263 && REGNO (target
) != REGNO (source
)))
4265 /* After arm_reorg has been called, we can't fix up expensive
4266 constants by pushing them into memory so we must synthesize
4267 them in-line, regardless of the cost. This is only likely to
4268 be more costly on chips that have load delay slots and we are
4269 compiling without running the scheduler (so no splitting
4270 occurred before the final instruction emission).
4272 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4274 if (!cfun
->machine
->after_arm_reorg
4276 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
4278 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
4283 /* Currently SET is the only monadic value for CODE, all
4284 the rest are diadic. */
4285 if (TARGET_USE_MOVT
)
4286 arm_emit_movpair (target
, GEN_INT (val
));
4288 emit_set_insn (target
, GEN_INT (val
));
4294 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
4296 if (TARGET_USE_MOVT
)
4297 arm_emit_movpair (temp
, GEN_INT (val
));
4299 emit_set_insn (temp
, GEN_INT (val
));
4301 /* For MINUS, the value is subtracted from, since we never
4302 have subtraction of a constant. */
4304 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
4306 emit_set_insn (target
,
4307 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
4313 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
4317 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4318 ARM/THUMB2 immediates, and add up to VAL.
4319 Thr function return value gives the number of insns required. */
4321 optimal_immediate_sequence (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4322 struct four_ints
*return_sequence
)
4324 int best_consecutive_zeros
= 0;
4328 struct four_ints tmp_sequence
;
4330 /* If we aren't targeting ARM, the best place to start is always at
4331 the bottom, otherwise look more closely. */
4334 for (i
= 0; i
< 32; i
+= 2)
4336 int consecutive_zeros
= 0;
4338 if (!(val
& (3 << i
)))
4340 while ((i
< 32) && !(val
& (3 << i
)))
4342 consecutive_zeros
+= 2;
4345 if (consecutive_zeros
> best_consecutive_zeros
)
4347 best_consecutive_zeros
= consecutive_zeros
;
4348 best_start
= i
- consecutive_zeros
;
4355 /* So long as it won't require any more insns to do so, it's
4356 desirable to emit a small constant (in bits 0...9) in the last
4357 insn. This way there is more chance that it can be combined with
4358 a later addressing insn to form a pre-indexed load or store
4359 operation. Consider:
4361 *((volatile int *)0xe0000100) = 1;
4362 *((volatile int *)0xe0000110) = 2;
4364 We want this to wind up as:
4368 str rB, [rA, #0x100]
4370 str rB, [rA, #0x110]
4372 rather than having to synthesize both large constants from scratch.
4374 Therefore, we calculate how many insns would be required to emit
4375 the constant starting from `best_start', and also starting from
4376 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4377 yield a shorter sequence, we may as well use zero. */
4378 insns1
= optimal_immediate_sequence_1 (code
, val
, return_sequence
, best_start
);
4380 && ((HOST_WIDE_INT_1U
<< best_start
) < val
))
4382 insns2
= optimal_immediate_sequence_1 (code
, val
, &tmp_sequence
, 0);
4383 if (insns2
<= insns1
)
4385 *return_sequence
= tmp_sequence
;
4393 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4395 optimal_immediate_sequence_1 (enum rtx_code code
, unsigned HOST_WIDE_INT val
,
4396 struct four_ints
*return_sequence
, int i
)
4398 int remainder
= val
& 0xffffffff;
4401 /* Try and find a way of doing the job in either two or three
4404 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4405 location. We start at position I. This may be the MSB, or
4406 optimial_immediate_sequence may have positioned it at the largest block
4407 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4408 wrapping around to the top of the word when we drop off the bottom.
4409 In the worst case this code should produce no more than four insns.
4411 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4412 constants, shifted to any arbitrary location. We should always start
4417 unsigned int b1
, b2
, b3
, b4
;
4418 unsigned HOST_WIDE_INT result
;
4421 gcc_assert (insns
< 4);
4426 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4427 if (remainder
& ((TARGET_ARM
? (3 << (i
- 2)) : (1 << (i
- 1)))))
4430 if (i
<= 12 && TARGET_THUMB2
&& code
== PLUS
)
4431 /* We can use addw/subw for the last 12 bits. */
4435 /* Use an 8-bit shifted/rotated immediate. */
4439 result
= remainder
& ((0x0ff << end
)
4440 | ((i
< end
) ? (0xff >> (32 - end
))
4447 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4448 arbitrary shifts. */
4449 i
-= TARGET_ARM
? 2 : 1;
4453 /* Next, see if we can do a better job with a thumb2 replicated
4456 We do it this way around to catch the cases like 0x01F001E0 where
4457 two 8-bit immediates would work, but a replicated constant would
4460 TODO: 16-bit constants that don't clear all the bits, but still win.
4461 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4464 b1
= (remainder
& 0xff000000) >> 24;
4465 b2
= (remainder
& 0x00ff0000) >> 16;
4466 b3
= (remainder
& 0x0000ff00) >> 8;
4467 b4
= remainder
& 0xff;
4471 /* The 8-bit immediate already found clears b1 (and maybe b2),
4472 but must leave b3 and b4 alone. */
4474 /* First try to find a 32-bit replicated constant that clears
4475 almost everything. We can assume that we can't do it in one,
4476 or else we wouldn't be here. */
4477 unsigned int tmp
= b1
& b2
& b3
& b4
;
4478 unsigned int tmp2
= tmp
+ (tmp
<< 8) + (tmp
<< 16)
4480 unsigned int matching_bytes
= (tmp
== b1
) + (tmp
== b2
)
4481 + (tmp
== b3
) + (tmp
== b4
);
4483 && (matching_bytes
>= 3
4484 || (matching_bytes
== 2
4485 && const_ok_for_op (remainder
& ~tmp2
, code
))))
4487 /* At least 3 of the bytes match, and the fourth has at
4488 least as many bits set, or two of the bytes match
4489 and it will only require one more insn to finish. */
4497 /* Second, try to find a 16-bit replicated constant that can
4498 leave three of the bytes clear. If b2 or b4 is already
4499 zero, then we can. If the 8-bit from above would not
4500 clear b2 anyway, then we still win. */
4501 else if (b1
== b3
&& (!b2
|| !b4
4502 || (remainder
& 0x00ff0000 & ~result
)))
4504 result
= remainder
& 0xff00ff00;
4510 /* The 8-bit immediate already found clears b2 (and maybe b3)
4511 and we don't get here unless b1 is alredy clear, but it will
4512 leave b4 unchanged. */
4514 /* If we can clear b2 and b4 at once, then we win, since the
4515 8-bits couldn't possibly reach that far. */
4518 result
= remainder
& 0x00ff00ff;
4524 return_sequence
->i
[insns
++] = result
;
4525 remainder
&= ~result
;
4527 if (code
== SET
|| code
== MINUS
)
4535 /* Emit an instruction with the indicated PATTERN. If COND is
4536 non-NULL, conditionalize the execution of the instruction on COND
4540 emit_constant_insn (rtx cond
, rtx pattern
)
4543 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
4544 emit_insn (pattern
);
4547 /* As above, but extra parameter GENERATE which, if clear, suppresses
4551 arm_gen_constant (enum rtx_code code
, machine_mode mode
, rtx cond
,
4552 unsigned HOST_WIDE_INT val
, rtx target
, rtx source
,
4553 int subtargets
, int generate
)
4557 int final_invert
= 0;
4559 int set_sign_bit_copies
= 0;
4560 int clear_sign_bit_copies
= 0;
4561 int clear_zero_bit_copies
= 0;
4562 int set_zero_bit_copies
= 0;
4563 int insns
= 0, neg_insns
, inv_insns
;
4564 unsigned HOST_WIDE_INT temp1
, temp2
;
4565 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
4566 struct four_ints
*immediates
;
4567 struct four_ints pos_immediates
, neg_immediates
, inv_immediates
;
4569 /* Find out which operations are safe for a given CODE. Also do a quick
4570 check for degenerate cases; these can occur when DImode operations
4583 if (remainder
== 0xffffffff)
4586 emit_constant_insn (cond
,
4587 gen_rtx_SET (target
,
4588 GEN_INT (ARM_SIGN_EXTEND (val
))));
4594 if (reload_completed
&& rtx_equal_p (target
, source
))
4598 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4607 emit_constant_insn (cond
, gen_rtx_SET (target
, const0_rtx
));
4610 if (remainder
== 0xffffffff)
4612 if (reload_completed
&& rtx_equal_p (target
, source
))
4615 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4624 if (reload_completed
&& rtx_equal_p (target
, source
))
4627 emit_constant_insn (cond
, gen_rtx_SET (target
, source
));
4631 if (remainder
== 0xffffffff)
4634 emit_constant_insn (cond
,
4635 gen_rtx_SET (target
,
4636 gen_rtx_NOT (mode
, source
)));
4643 /* We treat MINUS as (val - source), since (source - val) is always
4644 passed as (source + (-val)). */
4648 emit_constant_insn (cond
,
4649 gen_rtx_SET (target
,
4650 gen_rtx_NEG (mode
, source
)));
4653 if (const_ok_for_arm (val
))
4656 emit_constant_insn (cond
,
4657 gen_rtx_SET (target
,
4658 gen_rtx_MINUS (mode
, GEN_INT (val
),
4669 /* If we can do it in one insn get out quickly. */
4670 if (const_ok_for_op (val
, code
))
4673 emit_constant_insn (cond
,
4674 gen_rtx_SET (target
,
4676 ? gen_rtx_fmt_ee (code
, mode
, source
,
4682 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4684 if (code
== AND
&& (i
= exact_log2 (remainder
+ 1)) > 0
4685 && (arm_arch_thumb2
|| (i
== 16 && arm_arch6
&& mode
== SImode
)))
4689 if (mode
== SImode
&& i
== 16)
4690 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4692 emit_constant_insn (cond
,
4693 gen_zero_extendhisi2
4694 (target
, gen_lowpart (HImode
, source
)));
4696 /* Extz only supports SImode, but we can coerce the operands
4698 emit_constant_insn (cond
,
4699 gen_extzv_t2 (gen_lowpart (SImode
, target
),
4700 gen_lowpart (SImode
, source
),
4701 GEN_INT (i
), const0_rtx
));
4707 /* Calculate a few attributes that may be useful for specific
4709 /* Count number of leading zeros. */
4710 for (i
= 31; i
>= 0; i
--)
4712 if ((remainder
& (1 << i
)) == 0)
4713 clear_sign_bit_copies
++;
4718 /* Count number of leading 1's. */
4719 for (i
= 31; i
>= 0; i
--)
4721 if ((remainder
& (1 << i
)) != 0)
4722 set_sign_bit_copies
++;
4727 /* Count number of trailing zero's. */
4728 for (i
= 0; i
<= 31; i
++)
4730 if ((remainder
& (1 << i
)) == 0)
4731 clear_zero_bit_copies
++;
4736 /* Count number of trailing 1's. */
4737 for (i
= 0; i
<= 31; i
++)
4739 if ((remainder
& (1 << i
)) != 0)
4740 set_zero_bit_copies
++;
4748 /* See if we can do this by sign_extending a constant that is known
4749 to be negative. This is a good, way of doing it, since the shift
4750 may well merge into a subsequent insn. */
4751 if (set_sign_bit_copies
> 1)
4753 if (const_ok_for_arm
4754 (temp1
= ARM_SIGN_EXTEND (remainder
4755 << (set_sign_bit_copies
- 1))))
4759 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4760 emit_constant_insn (cond
,
4761 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4762 emit_constant_insn (cond
,
4763 gen_ashrsi3 (target
, new_src
,
4764 GEN_INT (set_sign_bit_copies
- 1)));
4768 /* For an inverted constant, we will need to set the low bits,
4769 these will be shifted out of harm's way. */
4770 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
4771 if (const_ok_for_arm (~temp1
))
4775 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4776 emit_constant_insn (cond
,
4777 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4778 emit_constant_insn (cond
,
4779 gen_ashrsi3 (target
, new_src
,
4780 GEN_INT (set_sign_bit_copies
- 1)));
4786 /* See if we can calculate the value as the difference between two
4787 valid immediates. */
4788 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
4790 int topshift
= clear_sign_bit_copies
& ~1;
4792 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
4793 & (0xff000000 >> topshift
));
4795 /* If temp1 is zero, then that means the 9 most significant
4796 bits of remainder were 1 and we've caused it to overflow.
4797 When topshift is 0 we don't need to do anything since we
4798 can borrow from 'bit 32'. */
4799 if (temp1
== 0 && topshift
!= 0)
4800 temp1
= 0x80000000 >> (topshift
- 1);
4802 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
4804 if (const_ok_for_arm (temp2
))
4808 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
4809 emit_constant_insn (cond
,
4810 gen_rtx_SET (new_src
, GEN_INT (temp1
)));
4811 emit_constant_insn (cond
,
4812 gen_addsi3 (target
, new_src
,
4820 /* See if we can generate this by setting the bottom (or the top)
4821 16 bits, and then shifting these into the other half of the
4822 word. We only look for the simplest cases, to do more would cost
4823 too much. Be careful, however, not to generate this when the
4824 alternative would take fewer insns. */
4825 if (val
& 0xffff0000)
4827 temp1
= remainder
& 0xffff0000;
4828 temp2
= remainder
& 0x0000ffff;
4830 /* Overlaps outside this range are best done using other methods. */
4831 for (i
= 9; i
< 24; i
++)
4833 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
4834 && !const_ok_for_arm (temp2
))
4836 rtx new_src
= (subtargets
4837 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4839 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
4840 source
, subtargets
, generate
);
4848 gen_rtx_ASHIFT (mode
, source
,
4855 /* Don't duplicate cases already considered. */
4856 for (i
= 17; i
< 24; i
++)
4858 if (((temp1
| (temp1
>> i
)) == remainder
)
4859 && !const_ok_for_arm (temp1
))
4861 rtx new_src
= (subtargets
4862 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
4864 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
4865 source
, subtargets
, generate
);
4870 gen_rtx_SET (target
,
4873 gen_rtx_LSHIFTRT (mode
, source
,
4884 /* If we have IOR or XOR, and the constant can be loaded in a
4885 single instruction, and we can find a temporary to put it in,
4886 then this can be done in two instructions instead of 3-4. */
4888 /* TARGET can't be NULL if SUBTARGETS is 0 */
4889 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
4891 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
4895 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4897 emit_constant_insn (cond
,
4898 gen_rtx_SET (sub
, GEN_INT (val
)));
4899 emit_constant_insn (cond
,
4900 gen_rtx_SET (target
,
4901 gen_rtx_fmt_ee (code
, mode
,
4912 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4913 and the remainder 0s for e.g. 0xfff00000)
4914 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4916 This can be done in 2 instructions by using shifts with mov or mvn.
4921 mvn r0, r0, lsr #12 */
4922 if (set_sign_bit_copies
> 8
4923 && (val
& (HOST_WIDE_INT_M1U
<< (32 - set_sign_bit_copies
))) == val
)
4927 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4928 rtx shift
= GEN_INT (set_sign_bit_copies
);
4934 gen_rtx_ASHIFT (mode
,
4939 gen_rtx_SET (target
,
4941 gen_rtx_LSHIFTRT (mode
, sub
,
4948 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4950 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4952 For eg. r0 = r0 | 0xfff
4957 if (set_zero_bit_copies
> 8
4958 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
4962 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4963 rtx shift
= GEN_INT (set_zero_bit_copies
);
4969 gen_rtx_LSHIFTRT (mode
,
4974 gen_rtx_SET (target
,
4976 gen_rtx_ASHIFT (mode
, sub
,
4982 /* This will never be reached for Thumb2 because orn is a valid
4983 instruction. This is for Thumb1 and the ARM 32 bit cases.
4985 x = y | constant (such that ~constant is a valid constant)
4987 x = ~(~y & ~constant).
4989 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
4993 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
4994 emit_constant_insn (cond
,
4996 gen_rtx_NOT (mode
, source
)));
4999 sub
= gen_reg_rtx (mode
);
5000 emit_constant_insn (cond
,
5002 gen_rtx_AND (mode
, source
,
5004 emit_constant_insn (cond
,
5005 gen_rtx_SET (target
,
5006 gen_rtx_NOT (mode
, sub
)));
5013 /* See if two shifts will do 2 or more insn's worth of work. */
5014 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
5016 HOST_WIDE_INT shift_mask
= ((0xffffffff
5017 << (32 - clear_sign_bit_copies
))
5020 if ((remainder
| shift_mask
) != 0xffffffff)
5022 HOST_WIDE_INT new_val
5023 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5027 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5028 insns
= arm_gen_constant (AND
, SImode
, cond
, new_val
,
5029 new_src
, source
, subtargets
, 1);
5034 rtx targ
= subtargets
? NULL_RTX
: target
;
5035 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5036 targ
, source
, subtargets
, 0);
5042 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5043 rtx shift
= GEN_INT (clear_sign_bit_copies
);
5045 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
5046 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
5052 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
5054 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
5056 if ((remainder
| shift_mask
) != 0xffffffff)
5058 HOST_WIDE_INT new_val
5059 = ARM_SIGN_EXTEND (remainder
| shift_mask
);
5062 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5064 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5065 new_src
, source
, subtargets
, 1);
5070 rtx targ
= subtargets
? NULL_RTX
: target
;
5072 insns
= arm_gen_constant (AND
, mode
, cond
, new_val
,
5073 targ
, source
, subtargets
, 0);
5079 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
5080 rtx shift
= GEN_INT (clear_zero_bit_copies
);
5082 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
5083 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
5095 /* Calculate what the instruction sequences would be if we generated it
5096 normally, negated, or inverted. */
5098 /* AND cannot be split into multiple insns, so invert and use BIC. */
5101 insns
= optimal_immediate_sequence (code
, remainder
, &pos_immediates
);
5104 neg_insns
= optimal_immediate_sequence (code
, (-remainder
) & 0xffffffff,
5109 if (can_invert
|| final_invert
)
5110 inv_insns
= optimal_immediate_sequence (code
, remainder
^ 0xffffffff,
5115 immediates
= &pos_immediates
;
5117 /* Is the negated immediate sequence more efficient? */
5118 if (neg_insns
< insns
&& neg_insns
<= inv_insns
)
5121 immediates
= &neg_immediates
;
5126 /* Is the inverted immediate sequence more efficient?
5127 We must allow for an extra NOT instruction for XOR operations, although
5128 there is some chance that the final 'mvn' will get optimized later. */
5129 if ((inv_insns
+ 1) < insns
|| (!final_invert
&& inv_insns
< insns
))
5132 immediates
= &inv_immediates
;
5140 /* Now output the chosen sequence as instructions. */
5143 for (i
= 0; i
< insns
; i
++)
5145 rtx new_src
, temp1_rtx
;
5147 temp1
= immediates
->i
[i
];
5149 if (code
== SET
|| code
== MINUS
)
5150 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
5151 else if ((final_invert
|| i
< (insns
- 1)) && subtargets
)
5152 new_src
= gen_reg_rtx (mode
);
5158 else if (can_negate
)
5161 temp1
= trunc_int_for_mode (temp1
, mode
);
5162 temp1_rtx
= GEN_INT (temp1
);
5166 else if (code
== MINUS
)
5167 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
5169 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
5171 emit_constant_insn (cond
, gen_rtx_SET (new_src
, temp1_rtx
));
5176 can_negate
= can_invert
;
5180 else if (code
== MINUS
)
5188 emit_constant_insn (cond
, gen_rtx_SET (target
,
5189 gen_rtx_NOT (mode
, source
)));
5196 /* Canonicalize a comparison so that we are more likely to recognize it.
5197 This can be done for a few constant compares, where we can make the
5198 immediate value easier to load. */
5201 arm_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
5202 bool op0_preserve_value
)
5205 unsigned HOST_WIDE_INT i
, maxval
;
5207 mode
= GET_MODE (*op0
);
5208 if (mode
== VOIDmode
)
5209 mode
= GET_MODE (*op1
);
5211 maxval
= (HOST_WIDE_INT_1U
<< (GET_MODE_BITSIZE (mode
) - 1)) - 1;
5213 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5214 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5215 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5216 for GTU/LEU in Thumb mode. */
5220 if (*code
== GT
|| *code
== LE
5221 || (!TARGET_ARM
&& (*code
== GTU
|| *code
== LEU
)))
5223 /* Missing comparison. First try to use an available
5225 if (CONST_INT_P (*op1
))
5233 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5235 *op1
= GEN_INT (i
+ 1);
5236 *code
= *code
== GT
? GE
: LT
;
5242 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5243 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
5245 *op1
= GEN_INT (i
+ 1);
5246 *code
= *code
== GTU
? GEU
: LTU
;
5255 /* If that did not work, reverse the condition. */
5256 if (!op0_preserve_value
)
5258 std::swap (*op0
, *op1
);
5259 *code
= (int)swap_condition ((enum rtx_code
)*code
);
5265 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5266 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5267 to facilitate possible combining with a cmp into 'ands'. */
5269 && GET_CODE (*op0
) == ZERO_EXTEND
5270 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
5271 && GET_MODE (XEXP (*op0
, 0)) == QImode
5272 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
5273 && subreg_lowpart_p (XEXP (*op0
, 0))
5274 && *op1
== const0_rtx
)
5275 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
5278 /* Comparisons smaller than DImode. Only adjust comparisons against
5279 an out-of-range constant. */
5280 if (!CONST_INT_P (*op1
)
5281 || const_ok_for_arm (INTVAL (*op1
))
5282 || const_ok_for_arm (- INTVAL (*op1
)))
5296 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5298 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5299 *code
= *code
== GT
? GE
: LT
;
5307 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5309 *op1
= GEN_INT (i
- 1);
5310 *code
= *code
== GE
? GT
: LE
;
5317 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
5318 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
5320 *op1
= GEN_INT (ARM_SIGN_EXTEND (i
+ 1));
5321 *code
= *code
== GTU
? GEU
: LTU
;
5329 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
5331 *op1
= GEN_INT (i
- 1);
5332 *code
= *code
== GEU
? GTU
: LEU
;
5343 /* Define how to find the value returned by a function. */
5346 arm_function_value(const_tree type
, const_tree func
,
5347 bool outgoing ATTRIBUTE_UNUSED
)
5350 int unsignedp ATTRIBUTE_UNUSED
;
5351 rtx r ATTRIBUTE_UNUSED
;
5353 mode
= TYPE_MODE (type
);
5355 if (TARGET_AAPCS_BASED
)
5356 return aapcs_allocate_return_reg (mode
, type
, func
);
5358 /* Promote integer types. */
5359 if (INTEGRAL_TYPE_P (type
))
5360 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
5362 /* Promotes small structs returned in a register to full-word size
5363 for big-endian AAPCS. */
5364 if (arm_return_in_msb (type
))
5366 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5367 if (size
% UNITS_PER_WORD
!= 0)
5369 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
5370 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
5374 return arm_libcall_value_1 (mode
);
5377 /* libcall hashtable helpers. */
5379 struct libcall_hasher
: nofree_ptr_hash
<const rtx_def
>
5381 static inline hashval_t
hash (const rtx_def
*);
5382 static inline bool equal (const rtx_def
*, const rtx_def
*);
5383 static inline void remove (rtx_def
*);
5387 libcall_hasher::equal (const rtx_def
*p1
, const rtx_def
*p2
)
5389 return rtx_equal_p (p1
, p2
);
5393 libcall_hasher::hash (const rtx_def
*p1
)
5395 return hash_rtx (p1
, VOIDmode
, NULL
, NULL
, FALSE
);
5398 typedef hash_table
<libcall_hasher
> libcall_table_type
;
5401 add_libcall (libcall_table_type
*htab
, rtx libcall
)
5403 *htab
->find_slot (libcall
, INSERT
) = libcall
;
5407 arm_libcall_uses_aapcs_base (const_rtx libcall
)
5409 static bool init_done
= false;
5410 static libcall_table_type
*libcall_htab
= NULL
;
5416 libcall_htab
= new libcall_table_type (31);
5417 add_libcall (libcall_htab
,
5418 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
5419 add_libcall (libcall_htab
,
5420 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
5421 add_libcall (libcall_htab
,
5422 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
5423 add_libcall (libcall_htab
,
5424 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
5426 add_libcall (libcall_htab
,
5427 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
5428 add_libcall (libcall_htab
,
5429 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
5430 add_libcall (libcall_htab
,
5431 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
5432 add_libcall (libcall_htab
,
5433 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
5435 add_libcall (libcall_htab
,
5436 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
5437 add_libcall (libcall_htab
,
5438 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
5439 add_libcall (libcall_htab
,
5440 convert_optab_libfunc (sfix_optab
, SImode
, DFmode
));
5441 add_libcall (libcall_htab
,
5442 convert_optab_libfunc (ufix_optab
, SImode
, DFmode
));
5443 add_libcall (libcall_htab
,
5444 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
5445 add_libcall (libcall_htab
,
5446 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
5447 add_libcall (libcall_htab
,
5448 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
5449 add_libcall (libcall_htab
,
5450 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
5452 /* Values from double-precision helper functions are returned in core
5453 registers if the selected core only supports single-precision
5454 arithmetic, even if we are using the hard-float ABI. The same is
5455 true for single-precision helpers, but we will never be using the
5456 hard-float ABI on a CPU which doesn't support single-precision
5457 operations in hardware. */
5458 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
5459 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
5460 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
5461 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
5462 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
5463 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
5464 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
5465 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
5466 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
5467 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
5468 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
5469 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
5471 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
5473 add_libcall (libcall_htab
,
5474 convert_optab_libfunc (trunc_optab
, HFmode
, DFmode
));
5477 return libcall
&& libcall_htab
->find (libcall
) != NULL
;
5481 arm_libcall_value_1 (machine_mode mode
)
5483 if (TARGET_AAPCS_BASED
)
5484 return aapcs_libcall_value (mode
);
5485 else if (TARGET_IWMMXT_ABI
5486 && arm_vector_mode_supported_p (mode
))
5487 return gen_rtx_REG (mode
, FIRST_IWMMXT_REGNUM
);
5489 return gen_rtx_REG (mode
, ARG_REGISTER (1));
5492 /* Define how to find the value returned by a library function
5493 assuming the value has mode MODE. */
5496 arm_libcall_value (machine_mode mode
, const_rtx libcall
)
5498 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
5499 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5501 /* The following libcalls return their result in integer registers,
5502 even though they return a floating point value. */
5503 if (arm_libcall_uses_aapcs_base (libcall
))
5504 return gen_rtx_REG (mode
, ARG_REGISTER(1));
5508 return arm_libcall_value_1 (mode
);
5511 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5514 arm_function_value_regno_p (const unsigned int regno
)
5516 if (regno
== ARG_REGISTER (1)
5518 && TARGET_AAPCS_BASED
5519 && TARGET_HARD_FLOAT
5520 && regno
== FIRST_VFP_REGNUM
)
5521 || (TARGET_IWMMXT_ABI
5522 && regno
== FIRST_IWMMXT_REGNUM
))
5528 /* Determine the amount of memory needed to store the possible return
5529 registers of an untyped call. */
5531 arm_apply_result_size (void)
5537 if (TARGET_HARD_FLOAT_ABI
)
5539 if (TARGET_IWMMXT_ABI
)
5546 /* Decide whether TYPE should be returned in memory (true)
5547 or in a register (false). FNTYPE is the type of the function making
5550 arm_return_in_memory (const_tree type
, const_tree fntype
)
5554 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
5556 if (TARGET_AAPCS_BASED
)
5558 /* Simple, non-aggregate types (ie not including vectors and
5559 complex) are always returned in a register (or registers).
5560 We don't care about which register here, so we can short-cut
5561 some of the detail. */
5562 if (!AGGREGATE_TYPE_P (type
)
5563 && TREE_CODE (type
) != VECTOR_TYPE
5564 && TREE_CODE (type
) != COMPLEX_TYPE
)
5567 /* Any return value that is no larger than one word can be
5569 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
5572 /* Check any available co-processors to see if they accept the
5573 type as a register candidate (VFP, for example, can return
5574 some aggregates in consecutive registers). These aren't
5575 available if the call is variadic. */
5576 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
5579 /* Vector values should be returned using ARM registers, not
5580 memory (unless they're over 16 bytes, which will break since
5581 we only have four call-clobbered registers to play with). */
5582 if (TREE_CODE (type
) == VECTOR_TYPE
)
5583 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5585 /* The rest go in memory. */
5589 if (TREE_CODE (type
) == VECTOR_TYPE
)
5590 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
5592 if (!AGGREGATE_TYPE_P (type
) &&
5593 (TREE_CODE (type
) != VECTOR_TYPE
))
5594 /* All simple types are returned in registers. */
5597 if (arm_abi
!= ARM_ABI_APCS
)
5599 /* ATPCS and later return aggregate types in memory only if they are
5600 larger than a word (or are variable size). */
5601 return (size
< 0 || size
> UNITS_PER_WORD
);
5604 /* For the arm-wince targets we choose to be compatible with Microsoft's
5605 ARM and Thumb compilers, which always return aggregates in memory. */
5607 /* All structures/unions bigger than one word are returned in memory.
5608 Also catch the case where int_size_in_bytes returns -1. In this case
5609 the aggregate is either huge or of variable size, and in either case
5610 we will want to return it via memory and not in a register. */
5611 if (size
< 0 || size
> UNITS_PER_WORD
)
5614 if (TREE_CODE (type
) == RECORD_TYPE
)
5618 /* For a struct the APCS says that we only return in a register
5619 if the type is 'integer like' and every addressable element
5620 has an offset of zero. For practical purposes this means
5621 that the structure can have at most one non bit-field element
5622 and that this element must be the first one in the structure. */
5624 /* Find the first field, ignoring non FIELD_DECL things which will
5625 have been created by C++. */
5626 for (field
= TYPE_FIELDS (type
);
5627 field
&& TREE_CODE (field
) != FIELD_DECL
;
5628 field
= DECL_CHAIN (field
))
5632 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5634 /* Check that the first field is valid for returning in a register. */
5636 /* ... Floats are not allowed */
5637 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5640 /* ... Aggregates that are not themselves valid for returning in
5641 a register are not allowed. */
5642 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5645 /* Now check the remaining fields, if any. Only bitfields are allowed,
5646 since they are not addressable. */
5647 for (field
= DECL_CHAIN (field
);
5649 field
= DECL_CHAIN (field
))
5651 if (TREE_CODE (field
) != FIELD_DECL
)
5654 if (!DECL_BIT_FIELD_TYPE (field
))
5661 if (TREE_CODE (type
) == UNION_TYPE
)
5665 /* Unions can be returned in registers if every element is
5666 integral, or can be returned in an integer register. */
5667 for (field
= TYPE_FIELDS (type
);
5669 field
= DECL_CHAIN (field
))
5671 if (TREE_CODE (field
) != FIELD_DECL
)
5674 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
5677 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
5683 #endif /* not ARM_WINCE */
5685 /* Return all other types in memory. */
5689 const struct pcs_attribute_arg
5693 } pcs_attribute_args
[] =
5695 {"aapcs", ARM_PCS_AAPCS
},
5696 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
5698 /* We could recognize these, but changes would be needed elsewhere
5699 * to implement them. */
5700 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
5701 {"atpcs", ARM_PCS_ATPCS
},
5702 {"apcs", ARM_PCS_APCS
},
5704 {NULL
, ARM_PCS_UNKNOWN
}
5708 arm_pcs_from_attribute (tree attr
)
5710 const struct pcs_attribute_arg
*ptr
;
5713 /* Get the value of the argument. */
5714 if (TREE_VALUE (attr
) == NULL_TREE
5715 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
5716 return ARM_PCS_UNKNOWN
;
5718 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
5720 /* Check it against the list of known arguments. */
5721 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
5722 if (streq (arg
, ptr
->arg
))
5725 /* An unrecognized interrupt type. */
5726 return ARM_PCS_UNKNOWN
;
5729 /* Get the PCS variant to use for this call. TYPE is the function's type
5730 specification, DECL is the specific declartion. DECL may be null if
5731 the call could be indirect or if this is a library call. */
5733 arm_get_pcs_model (const_tree type
, const_tree decl
)
5735 bool user_convention
= false;
5736 enum arm_pcs user_pcs
= arm_pcs_default
;
5741 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
5744 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
5745 user_convention
= true;
5748 if (TARGET_AAPCS_BASED
)
5750 /* Detect varargs functions. These always use the base rules
5751 (no argument is ever a candidate for a co-processor
5753 bool base_rules
= stdarg_p (type
);
5755 if (user_convention
)
5757 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
5758 sorry ("non-AAPCS derived PCS variant");
5759 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
5760 error ("variadic functions must use the base AAPCS variant");
5764 return ARM_PCS_AAPCS
;
5765 else if (user_convention
)
5767 else if (decl
&& flag_unit_at_a_time
)
5769 /* Local functions never leak outside this compilation unit,
5770 so we are free to use whatever conventions are
5772 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5773 cgraph_local_info
*i
= cgraph_node::local_info (CONST_CAST_TREE(decl
));
5775 return ARM_PCS_AAPCS_LOCAL
;
5778 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
5779 sorry ("PCS variant");
5781 /* For everything else we use the target's default. */
5782 return arm_pcs_default
;
5787 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
5788 const_tree fntype ATTRIBUTE_UNUSED
,
5789 rtx libcall ATTRIBUTE_UNUSED
,
5790 const_tree fndecl ATTRIBUTE_UNUSED
)
5792 /* Record the unallocated VFP registers. */
5793 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
5794 pcum
->aapcs_vfp_reg_alloc
= 0;
5797 /* Walk down the type tree of TYPE counting consecutive base elements.
5798 If *MODEP is VOIDmode, then set it to the first valid floating point
5799 type. If a non-floating point type is found, or if a floating point
5800 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5801 otherwise return the count in the sub-tree. */
5803 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
5808 switch (TREE_CODE (type
))
5811 mode
= TYPE_MODE (type
);
5812 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= HFmode
)
5815 if (*modep
== VOIDmode
)
5824 mode
= TYPE_MODE (TREE_TYPE (type
));
5825 if (mode
!= DFmode
&& mode
!= SFmode
)
5828 if (*modep
== VOIDmode
)
5837 /* Use V2SImode and V4SImode as representatives of all 64-bit
5838 and 128-bit vector types, whether or not those modes are
5839 supported with the present options. */
5840 size
= int_size_in_bytes (type
);
5853 if (*modep
== VOIDmode
)
5856 /* Vector modes are considered to be opaque: two vectors are
5857 equivalent for the purposes of being homogeneous aggregates
5858 if they are the same size. */
5867 tree index
= TYPE_DOMAIN (type
);
5869 /* Can't handle incomplete types nor sizes that are not
5871 if (!COMPLETE_TYPE_P (type
)
5872 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5875 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5878 || !TYPE_MAX_VALUE (index
)
5879 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
5880 || !TYPE_MIN_VALUE (index
)
5881 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
5885 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
5886 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
5888 /* There must be no padding. */
5889 if (wi::to_wide (TYPE_SIZE (type
))
5890 != count
* GET_MODE_BITSIZE (*modep
))
5902 /* Can't handle incomplete types nor sizes that are not
5904 if (!COMPLETE_TYPE_P (type
)
5905 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5908 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5910 if (TREE_CODE (field
) != FIELD_DECL
)
5913 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5919 /* There must be no padding. */
5920 if (wi::to_wide (TYPE_SIZE (type
))
5921 != count
* GET_MODE_BITSIZE (*modep
))
5928 case QUAL_UNION_TYPE
:
5930 /* These aren't very interesting except in a degenerate case. */
5935 /* Can't handle incomplete types nor sizes that are not
5937 if (!COMPLETE_TYPE_P (type
)
5938 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
5941 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5943 if (TREE_CODE (field
) != FIELD_DECL
)
5946 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5949 count
= count
> sub_count
? count
: sub_count
;
5952 /* There must be no padding. */
5953 if (wi::to_wide (TYPE_SIZE (type
))
5954 != count
* GET_MODE_BITSIZE (*modep
))
5967 /* Return true if PCS_VARIANT should use VFP registers. */
5969 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
5971 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
5973 static bool seen_thumb1_vfp
= false;
5975 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
5977 sorry ("Thumb-1 hard-float VFP ABI");
5978 /* sorry() is not immediately fatal, so only display this once. */
5979 seen_thumb1_vfp
= true;
5985 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
5988 return (TARGET_32BIT
&& TARGET_HARD_FLOAT
&&
5989 (TARGET_VFP_DOUBLE
|| !is_double
));
5992 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5993 suitable for passing or returning in VFP registers for the PCS
5994 variant selected. If it is, then *BASE_MODE is updated to contain
5995 a machine mode describing each element of the argument's type and
5996 *COUNT to hold the number of such elements. */
5998 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
5999 machine_mode mode
, const_tree type
,
6000 machine_mode
*base_mode
, int *count
)
6002 machine_mode new_mode
= VOIDmode
;
6004 /* If we have the type information, prefer that to working things
6005 out from the mode. */
6008 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6010 if (ag_count
> 0 && ag_count
<= 4)
6015 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
6016 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6017 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6022 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6025 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
6031 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
6034 *base_mode
= new_mode
;
6039 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
6040 machine_mode mode
, const_tree type
)
6042 int count ATTRIBUTE_UNUSED
;
6043 machine_mode ag_mode ATTRIBUTE_UNUSED
;
6045 if (!use_vfp_abi (pcs_variant
, false))
6047 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6052 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6055 if (!use_vfp_abi (pcum
->pcs_variant
, false))
6058 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
6059 &pcum
->aapcs_vfp_rmode
,
6060 &pcum
->aapcs_vfp_rcount
);
6063 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6064 for the behaviour of this function. */
6067 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6068 const_tree type ATTRIBUTE_UNUSED
)
6071 = MAX (GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
), GET_MODE_SIZE (SFmode
));
6072 int shift
= rmode_size
/ GET_MODE_SIZE (SFmode
);
6073 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
6076 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
6077 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
6079 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
6081 || (mode
== TImode
&& ! TARGET_NEON
)
6082 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM
+ regno
, mode
))
6085 int rcount
= pcum
->aapcs_vfp_rcount
;
6087 machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
6091 /* Avoid using unsupported vector modes. */
6092 if (rmode
== V2SImode
)
6094 else if (rmode
== V4SImode
)
6101 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
6102 for (i
= 0; i
< rcount
; i
++)
6104 rtx tmp
= gen_rtx_REG (rmode
,
6105 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
6106 tmp
= gen_rtx_EXPR_LIST
6108 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
6109 XVECEXP (par
, 0, i
) = tmp
;
6112 pcum
->aapcs_reg
= par
;
6115 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
6121 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6122 comment there for the behaviour of this function. */
6125 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
6127 const_tree type ATTRIBUTE_UNUSED
)
6129 if (!use_vfp_abi (pcs_variant
, false))
6133 || (GET_MODE_CLASS (mode
) == MODE_INT
6134 && GET_MODE_SIZE (mode
) >= GET_MODE_SIZE (TImode
)
6138 machine_mode ag_mode
;
6143 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
6148 if (ag_mode
== V2SImode
)
6150 else if (ag_mode
== V4SImode
)
6156 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
6157 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
6158 for (i
= 0; i
< count
; i
++)
6160 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
6161 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
6162 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
6163 XVECEXP (par
, 0, i
) = tmp
;
6169 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
6173 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
6174 machine_mode mode ATTRIBUTE_UNUSED
,
6175 const_tree type ATTRIBUTE_UNUSED
)
6177 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
6178 pcum
->aapcs_vfp_reg_alloc
= 0;
6182 #define AAPCS_CP(X) \
6184 aapcs_ ## X ## _cum_init, \
6185 aapcs_ ## X ## _is_call_candidate, \
6186 aapcs_ ## X ## _allocate, \
6187 aapcs_ ## X ## _is_return_candidate, \
6188 aapcs_ ## X ## _allocate_return_reg, \
6189 aapcs_ ## X ## _advance \
6192 /* Table of co-processors that can be used to pass arguments in
6193 registers. Idealy no arugment should be a candidate for more than
6194 one co-processor table entry, but the table is processed in order
6195 and stops after the first match. If that entry then fails to put
6196 the argument into a co-processor register, the argument will go on
6200 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6201 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
6203 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6204 BLKmode) is a candidate for this co-processor's registers; this
6205 function should ignore any position-dependent state in
6206 CUMULATIVE_ARGS and only use call-type dependent information. */
6207 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6209 /* Return true if the argument does get a co-processor register; it
6210 should set aapcs_reg to an RTX of the register allocated as is
6211 required for a return from FUNCTION_ARG. */
6212 bool (*allocate
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6214 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6215 be returned in this co-processor's registers. */
6216 bool (*is_return_candidate
) (enum arm_pcs
, machine_mode
, const_tree
);
6218 /* Allocate and return an RTX element to hold the return type of a call. This
6219 routine must not fail and will only be called if is_return_candidate
6220 returned true with the same parameters. */
6221 rtx (*allocate_return_reg
) (enum arm_pcs
, machine_mode
, const_tree
);
6223 /* Finish processing this argument and prepare to start processing
6225 void (*advance
) (CUMULATIVE_ARGS
*, machine_mode
, const_tree
);
6226 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
6234 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6239 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6240 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
6247 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
6249 /* We aren't passed a decl, so we can't check that a call is local.
6250 However, it isn't clear that that would be a win anyway, since it
6251 might limit some tail-calling opportunities. */
6252 enum arm_pcs pcs_variant
;
6256 const_tree fndecl
= NULL_TREE
;
6258 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6261 fntype
= TREE_TYPE (fntype
);
6264 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6267 pcs_variant
= arm_pcs_default
;
6269 if (pcs_variant
!= ARM_PCS_AAPCS
)
6273 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6274 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
6283 aapcs_allocate_return_reg (machine_mode mode
, const_tree type
,
6286 /* We aren't passed a decl, so we can't check that a call is local.
6287 However, it isn't clear that that would be a win anyway, since it
6288 might limit some tail-calling opportunities. */
6289 enum arm_pcs pcs_variant
;
6290 int unsignedp ATTRIBUTE_UNUSED
;
6294 const_tree fndecl
= NULL_TREE
;
6296 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
6299 fntype
= TREE_TYPE (fntype
);
6302 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6305 pcs_variant
= arm_pcs_default
;
6307 /* Promote integer types. */
6308 if (type
&& INTEGRAL_TYPE_P (type
))
6309 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
6311 if (pcs_variant
!= ARM_PCS_AAPCS
)
6315 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6316 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
6318 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
6322 /* Promotes small structs returned in a register to full-word size
6323 for big-endian AAPCS. */
6324 if (type
&& arm_return_in_msb (type
))
6326 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6327 if (size
% UNITS_PER_WORD
!= 0)
6329 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
6330 mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 0).require ();
6334 return gen_rtx_REG (mode
, R0_REGNUM
);
6338 aapcs_libcall_value (machine_mode mode
)
6340 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
6341 && GET_MODE_SIZE (mode
) <= 4)
6344 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
6347 /* Lay out a function argument using the AAPCS rules. The rule
6348 numbers referred to here are those in the AAPCS. */
6350 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, machine_mode mode
,
6351 const_tree type
, bool named
)
6356 /* We only need to do this once per argument. */
6357 if (pcum
->aapcs_arg_processed
)
6360 pcum
->aapcs_arg_processed
= true;
6362 /* Special case: if named is false then we are handling an incoming
6363 anonymous argument which is on the stack. */
6367 /* Is this a potential co-processor register candidate? */
6368 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6370 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
6371 pcum
->aapcs_cprc_slot
= slot
;
6373 /* We don't have to apply any of the rules from part B of the
6374 preparation phase, these are handled elsewhere in the
6379 /* A Co-processor register candidate goes either in its own
6380 class of registers or on the stack. */
6381 if (!pcum
->aapcs_cprc_failed
[slot
])
6383 /* C1.cp - Try to allocate the argument to co-processor
6385 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
6388 /* C2.cp - Put the argument on the stack and note that we
6389 can't assign any more candidates in this slot. We also
6390 need to note that we have allocated stack space, so that
6391 we won't later try to split a non-cprc candidate between
6392 core registers and the stack. */
6393 pcum
->aapcs_cprc_failed
[slot
] = true;
6394 pcum
->can_split
= false;
6397 /* We didn't get a register, so this argument goes on the
6399 gcc_assert (pcum
->can_split
== false);
6404 /* C3 - For double-word aligned arguments, round the NCRN up to the
6405 next even number. */
6406 ncrn
= pcum
->aapcs_ncrn
;
6409 int res
= arm_needs_doubleword_align (mode
, type
);
6410 /* Only warn during RTL expansion of call stmts, otherwise we would
6411 warn e.g. during gimplification even on functions that will be
6412 always inlined, and we'd warn multiple times. Don't warn when
6413 called in expand_function_start either, as we warn instead in
6414 arm_function_arg_boundary in that case. */
6415 if (res
< 0 && warn_psabi
&& currently_expanding_gimple_stmt
)
6416 inform (input_location
, "parameter passing for argument of type "
6417 "%qT changed in GCC 7.1", type
);
6422 nregs
= ARM_NUM_REGS2(mode
, type
);
6424 /* Sigh, this test should really assert that nregs > 0, but a GCC
6425 extension allows empty structs and then gives them empty size; it
6426 then allows such a structure to be passed by value. For some of
6427 the code below we have to pretend that such an argument has
6428 non-zero size so that we 'locate' it correctly either in
6429 registers or on the stack. */
6430 gcc_assert (nregs
>= 0);
6432 nregs2
= nregs
? nregs
: 1;
6434 /* C4 - Argument fits entirely in core registers. */
6435 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
6437 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6438 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
6442 /* C5 - Some core registers left and there are no arguments already
6443 on the stack: split this argument between the remaining core
6444 registers and the stack. */
6445 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
6447 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
6448 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6449 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
6453 /* C6 - NCRN is set to 4. */
6454 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
6456 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6460 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6461 for a call to a function whose data type is FNTYPE.
6462 For a library call, FNTYPE is NULL. */
6464 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
6466 tree fndecl ATTRIBUTE_UNUSED
)
6468 /* Long call handling. */
6470 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
6472 pcum
->pcs_variant
= arm_pcs_default
;
6474 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6476 if (arm_libcall_uses_aapcs_base (libname
))
6477 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
6479 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
6480 pcum
->aapcs_reg
= NULL_RTX
;
6481 pcum
->aapcs_partial
= 0;
6482 pcum
->aapcs_arg_processed
= false;
6483 pcum
->aapcs_cprc_slot
= -1;
6484 pcum
->can_split
= true;
6486 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
6490 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
6492 pcum
->aapcs_cprc_failed
[i
] = false;
6493 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
6501 /* On the ARM, the offset starts at 0. */
6503 pcum
->iwmmxt_nregs
= 0;
6504 pcum
->can_split
= true;
6506 /* Varargs vectors are treated the same as long long.
6507 named_count avoids having to change the way arm handles 'named' */
6508 pcum
->named_count
= 0;
6511 if (TARGET_REALLY_IWMMXT
&& fntype
)
6515 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
6517 fn_arg
= TREE_CHAIN (fn_arg
))
6518 pcum
->named_count
+= 1;
6520 if (! pcum
->named_count
)
6521 pcum
->named_count
= INT_MAX
;
6525 /* Return 1 if double word alignment is required for argument passing.
6526 Return -1 if double word alignment used to be required for argument
6527 passing before PR77728 ABI fix, but is not required anymore.
6528 Return 0 if double word alignment is not required and wasn't requried
6531 arm_needs_doubleword_align (machine_mode mode
, const_tree type
)
6534 return GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
;
6536 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6537 if (!AGGREGATE_TYPE_P (type
))
6538 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
)) > PARM_BOUNDARY
;
6540 /* Array types: Use member alignment of element type. */
6541 if (TREE_CODE (type
) == ARRAY_TYPE
)
6542 return TYPE_ALIGN (TREE_TYPE (type
)) > PARM_BOUNDARY
;
6545 /* Record/aggregate types: Use greatest member alignment of any member. */
6546 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6547 if (DECL_ALIGN (field
) > PARM_BOUNDARY
)
6549 if (TREE_CODE (field
) == FIELD_DECL
)
6552 /* Before PR77728 fix, we were incorrectly considering also
6553 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6554 Make sure we can warn about that with -Wpsabi. */
6562 /* Determine where to put an argument to a function.
6563 Value is zero to push the argument on the stack,
6564 or a hard register in which to store the argument.
6566 MODE is the argument's machine mode.
6567 TYPE is the data type of the argument (as a tree).
6568 This is null for libcalls where that information may
6570 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6571 the preceding args and about the function being called.
6572 NAMED is nonzero if this argument is a named parameter
6573 (otherwise it is an extra parameter matching an ellipsis).
6575 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6576 other arguments are passed on the stack. If (NAMED == 0) (which happens
6577 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6578 defined), say it is passed in the stack (function_prologue will
6579 indeed make it pass in the stack if necessary). */
6582 arm_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
6583 const_tree type
, bool named
)
6585 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6588 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6589 a call insn (op3 of a call_value insn). */
6590 if (mode
== VOIDmode
)
6593 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6595 aapcs_layout_arg (pcum
, mode
, type
, named
);
6596 return pcum
->aapcs_reg
;
6599 /* Varargs vectors are treated the same as long long.
6600 named_count avoids having to change the way arm handles 'named' */
6601 if (TARGET_IWMMXT_ABI
6602 && arm_vector_mode_supported_p (mode
)
6603 && pcum
->named_count
> pcum
->nargs
+ 1)
6605 if (pcum
->iwmmxt_nregs
<= 9)
6606 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
6609 pcum
->can_split
= false;
6614 /* Put doubleword aligned quantities in even register pairs. */
6615 if ((pcum
->nregs
& 1) && ARM_DOUBLEWORD_ALIGN
)
6617 int res
= arm_needs_doubleword_align (mode
, type
);
6618 if (res
< 0 && warn_psabi
)
6619 inform (input_location
, "parameter passing for argument of type "
6620 "%qT changed in GCC 7.1", type
);
6625 /* Only allow splitting an arg between regs and memory if all preceding
6626 args were allocated to regs. For args passed by reference we only count
6627 the reference pointer. */
6628 if (pcum
->can_split
)
6631 nregs
= ARM_NUM_REGS2 (mode
, type
);
6633 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
6636 return gen_rtx_REG (mode
, pcum
->nregs
);
6640 arm_function_arg_boundary (machine_mode mode
, const_tree type
)
6642 if (!ARM_DOUBLEWORD_ALIGN
)
6643 return PARM_BOUNDARY
;
6645 int res
= arm_needs_doubleword_align (mode
, type
);
6646 if (res
< 0 && warn_psabi
)
6647 inform (input_location
, "parameter passing for argument of type %qT "
6648 "changed in GCC 7.1", type
);
6650 return res
> 0 ? DOUBLEWORD_ALIGNMENT
: PARM_BOUNDARY
;
6654 arm_arg_partial_bytes (cumulative_args_t pcum_v
, machine_mode mode
,
6655 tree type
, bool named
)
6657 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6658 int nregs
= pcum
->nregs
;
6660 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6662 aapcs_layout_arg (pcum
, mode
, type
, named
);
6663 return pcum
->aapcs_partial
;
6666 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
6669 if (NUM_ARG_REGS
> nregs
6670 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
6672 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
6677 /* Update the data in PCUM to advance over an argument
6678 of mode MODE and data type TYPE.
6679 (TYPE is null for libcalls where that information may not be available.) */
6682 arm_function_arg_advance (cumulative_args_t pcum_v
, machine_mode mode
,
6683 const_tree type
, bool named
)
6685 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
6687 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
6689 aapcs_layout_arg (pcum
, mode
, type
, named
);
6691 if (pcum
->aapcs_cprc_slot
>= 0)
6693 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
6695 pcum
->aapcs_cprc_slot
= -1;
6698 /* Generic stuff. */
6699 pcum
->aapcs_arg_processed
= false;
6700 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
6701 pcum
->aapcs_reg
= NULL_RTX
;
6702 pcum
->aapcs_partial
= 0;
6707 if (arm_vector_mode_supported_p (mode
)
6708 && pcum
->named_count
> pcum
->nargs
6709 && TARGET_IWMMXT_ABI
)
6710 pcum
->iwmmxt_nregs
+= 1;
6712 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
6716 /* Variable sized types are passed by reference. This is a GCC
6717 extension to the ARM ABI. */
6720 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
6721 machine_mode mode ATTRIBUTE_UNUSED
,
6722 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6724 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
6727 /* Encode the current state of the #pragma [no_]long_calls. */
6730 OFF
, /* No #pragma [no_]long_calls is in effect. */
6731 LONG
, /* #pragma long_calls is in effect. */
6732 SHORT
/* #pragma no_long_calls is in effect. */
6735 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
6738 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6740 arm_pragma_long_calls
= LONG
;
6744 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6746 arm_pragma_long_calls
= SHORT
;
6750 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
6752 arm_pragma_long_calls
= OFF
;
6755 /* Handle an attribute requiring a FUNCTION_DECL;
6756 arguments as in struct attribute_spec.handler. */
6758 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
6759 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6761 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6763 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6765 *no_add_attrs
= true;
6771 /* Handle an "interrupt" or "isr" attribute;
6772 arguments as in struct attribute_spec.handler. */
6774 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
6779 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6781 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6783 *no_add_attrs
= true;
6785 /* FIXME: the argument if any is checked for type attributes;
6786 should it be checked for decl ones? */
6790 if (TREE_CODE (*node
) == FUNCTION_TYPE
6791 || TREE_CODE (*node
) == METHOD_TYPE
)
6793 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
6795 warning (OPT_Wattributes
, "%qE attribute ignored",
6797 *no_add_attrs
= true;
6800 else if (TREE_CODE (*node
) == POINTER_TYPE
6801 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
6802 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
6803 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
6805 *node
= build_variant_type_copy (*node
);
6806 TREE_TYPE (*node
) = build_type_attribute_variant
6808 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
6809 *no_add_attrs
= true;
6813 /* Possibly pass this attribute on from the type to a decl. */
6814 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
6815 | (int) ATTR_FLAG_FUNCTION_NEXT
6816 | (int) ATTR_FLAG_ARRAY_NEXT
))
6818 *no_add_attrs
= true;
6819 return tree_cons (name
, args
, NULL_TREE
);
6823 warning (OPT_Wattributes
, "%qE attribute ignored",
6832 /* Handle a "pcs" attribute; arguments as in struct
6833 attribute_spec.handler. */
6835 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
6836 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
6838 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
6840 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
6841 *no_add_attrs
= true;
6846 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6847 /* Handle the "notshared" attribute. This attribute is another way of
6848 requesting hidden visibility. ARM's compiler supports
6849 "__declspec(notshared)"; we support the same thing via an
6853 arm_handle_notshared_attribute (tree
*node
,
6854 tree name ATTRIBUTE_UNUSED
,
6855 tree args ATTRIBUTE_UNUSED
,
6856 int flags ATTRIBUTE_UNUSED
,
6859 tree decl
= TYPE_NAME (*node
);
6863 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
6864 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
6865 *no_add_attrs
= false;
6871 /* This function returns true if a function with declaration FNDECL and type
6872 FNTYPE uses the stack to pass arguments or return variables and false
6873 otherwise. This is used for functions with the attributes
6874 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6875 diagnostic messages if the stack is used. NAME is the name of the attribute
6879 cmse_func_args_or_return_in_stack (tree fndecl
, tree name
, tree fntype
)
6881 function_args_iterator args_iter
;
6882 CUMULATIVE_ARGS args_so_far_v
;
6883 cumulative_args_t args_so_far
;
6884 bool first_param
= true;
6885 tree arg_type
, prev_arg_type
= NULL_TREE
, ret_type
;
6887 /* Error out if any argument is passed on the stack. */
6888 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
, fndecl
);
6889 args_so_far
= pack_cumulative_args (&args_so_far_v
);
6890 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
6893 machine_mode arg_mode
= TYPE_MODE (arg_type
);
6895 prev_arg_type
= arg_type
;
6896 if (VOID_TYPE_P (arg_type
))
6900 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
, true);
6901 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
, true);
6903 || arm_arg_partial_bytes (args_so_far
, arg_mode
, arg_type
, true))
6905 error ("%qE attribute not available to functions with arguments "
6906 "passed on the stack", name
);
6909 first_param
= false;
6912 /* Error out for variadic functions since we cannot control how many
6913 arguments will be passed and thus stack could be used. stdarg_p () is not
6914 used for the checking to avoid browsing arguments twice. */
6915 if (prev_arg_type
!= NULL_TREE
&& !VOID_TYPE_P (prev_arg_type
))
6917 error ("%qE attribute not available to functions with variable number "
6918 "of arguments", name
);
6922 /* Error out if return value is passed on the stack. */
6923 ret_type
= TREE_TYPE (fntype
);
6924 if (arm_return_in_memory (ret_type
, fntype
))
6926 error ("%qE attribute not available to functions that return value on "
6933 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6934 function will check whether the attribute is allowed here and will add the
6935 attribute to the function declaration tree or otherwise issue a warning. */
6938 arm_handle_cmse_nonsecure_entry (tree
*node
, tree name
,
6947 *no_add_attrs
= true;
6948 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
6953 /* Ignore attribute for function types. */
6954 if (TREE_CODE (*node
) != FUNCTION_DECL
)
6956 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
6958 *no_add_attrs
= true;
6964 /* Warn for static linkage functions. */
6965 if (!TREE_PUBLIC (fndecl
))
6967 warning (OPT_Wattributes
, "%qE attribute has no effect on functions "
6968 "with static linkage", name
);
6969 *no_add_attrs
= true;
6973 *no_add_attrs
|= cmse_func_args_or_return_in_stack (fndecl
, name
,
6974 TREE_TYPE (fndecl
));
6979 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6980 function will check whether the attribute is allowed here and will add the
6981 attribute to the function type tree or otherwise issue a diagnostic. The
6982 reason we check this at declaration time is to only allow the use of the
6983 attribute with declarations of function pointers and not function
6984 declarations. This function checks NODE is of the expected type and issues
6985 diagnostics otherwise using NAME. If it is not of the expected type
6986 *NO_ADD_ATTRS will be set to true. */
6989 arm_handle_cmse_nonsecure_call (tree
*node
, tree name
,
6994 tree decl
= NULL_TREE
, fntype
= NULL_TREE
;
6999 *no_add_attrs
= true;
7000 warning (OPT_Wattributes
, "%qE attribute ignored without -mcmse option.",
7005 if (TREE_CODE (*node
) == VAR_DECL
|| TREE_CODE (*node
) == TYPE_DECL
)
7008 fntype
= TREE_TYPE (decl
);
7011 while (fntype
!= NULL_TREE
&& TREE_CODE (fntype
) == POINTER_TYPE
)
7012 fntype
= TREE_TYPE (fntype
);
7014 if (!decl
|| TREE_CODE (fntype
) != FUNCTION_TYPE
)
7016 warning (OPT_Wattributes
, "%qE attribute only applies to base type of a "
7017 "function pointer", name
);
7018 *no_add_attrs
= true;
7022 *no_add_attrs
|= cmse_func_args_or_return_in_stack (NULL
, name
, fntype
);
7027 /* Prevent trees being shared among function types with and without
7028 cmse_nonsecure_call attribute. */
7029 type
= TREE_TYPE (decl
);
7031 type
= build_distinct_type_copy (type
);
7032 TREE_TYPE (decl
) = type
;
7035 while (TREE_CODE (fntype
) != FUNCTION_TYPE
)
7038 fntype
= TREE_TYPE (fntype
);
7039 fntype
= build_distinct_type_copy (fntype
);
7040 TREE_TYPE (type
) = fntype
;
7043 /* Construct a type attribute and add it to the function type. */
7044 tree attrs
= tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE
,
7045 TYPE_ATTRIBUTES (fntype
));
7046 TYPE_ATTRIBUTES (fntype
) = attrs
;
7050 /* Return 0 if the attributes for two types are incompatible, 1 if they
7051 are compatible, and 2 if they are nearly compatible (which causes a
7052 warning to be generated). */
7054 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
7058 /* Check for mismatch of non-default calling convention. */
7059 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
7062 /* Check for mismatched call attributes. */
7063 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7064 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7065 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
7066 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
7068 /* Only bother to check if an attribute is defined. */
7069 if (l1
| l2
| s1
| s2
)
7071 /* If one type has an attribute, the other must have the same attribute. */
7072 if ((l1
!= l2
) || (s1
!= s2
))
7075 /* Disallow mixed attributes. */
7076 if ((l1
& s2
) || (l2
& s1
))
7080 /* Check for mismatched ISR attribute. */
7081 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
7083 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
7084 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
7086 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
7090 l1
= lookup_attribute ("cmse_nonsecure_call",
7091 TYPE_ATTRIBUTES (type1
)) != NULL
;
7092 l2
= lookup_attribute ("cmse_nonsecure_call",
7093 TYPE_ATTRIBUTES (type2
)) != NULL
;
7101 /* Assigns default attributes to newly defined type. This is used to
7102 set short_call/long_call attributes for function types of
7103 functions defined inside corresponding #pragma scopes. */
7105 arm_set_default_type_attributes (tree type
)
7107 /* Add __attribute__ ((long_call)) to all functions, when
7108 inside #pragma long_calls or __attribute__ ((short_call)),
7109 when inside #pragma no_long_calls. */
7110 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
7112 tree type_attr_list
, attr_name
;
7113 type_attr_list
= TYPE_ATTRIBUTES (type
);
7115 if (arm_pragma_long_calls
== LONG
)
7116 attr_name
= get_identifier ("long_call");
7117 else if (arm_pragma_long_calls
== SHORT
)
7118 attr_name
= get_identifier ("short_call");
7122 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
7123 TYPE_ATTRIBUTES (type
) = type_attr_list
;
7127 /* Return true if DECL is known to be linked into section SECTION. */
7130 arm_function_in_section_p (tree decl
, section
*section
)
7132 /* We can only be certain about the prevailing symbol definition. */
7133 if (!decl_binds_to_current_def_p (decl
))
7136 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7137 if (!DECL_SECTION_NAME (decl
))
7139 /* Make sure that we will not create a unique section for DECL. */
7140 if (flag_function_sections
|| DECL_COMDAT_GROUP (decl
))
7144 return function_section (decl
) == section
;
7147 /* Return nonzero if a 32-bit "long_call" should be generated for
7148 a call from the current function to DECL. We generate a long_call
7151 a. has an __attribute__((long call))
7152 or b. is within the scope of a #pragma long_calls
7153 or c. the -mlong-calls command line switch has been specified
7155 However we do not generate a long call if the function:
7157 d. has an __attribute__ ((short_call))
7158 or e. is inside the scope of a #pragma no_long_calls
7159 or f. is defined in the same section as the current function. */
7162 arm_is_long_call_p (tree decl
)
7167 return TARGET_LONG_CALLS
;
7169 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
7170 if (lookup_attribute ("short_call", attrs
))
7173 /* For "f", be conservative, and only cater for cases in which the
7174 whole of the current function is placed in the same section. */
7175 if (!flag_reorder_blocks_and_partition
7176 && TREE_CODE (decl
) == FUNCTION_DECL
7177 && arm_function_in_section_p (decl
, current_function_section ()))
7180 if (lookup_attribute ("long_call", attrs
))
7183 return TARGET_LONG_CALLS
;
7186 /* Return nonzero if it is ok to make a tail-call to DECL. */
7188 arm_function_ok_for_sibcall (tree decl
, tree exp
)
7190 unsigned long func_type
;
7192 if (cfun
->machine
->sibcall_blocked
)
7195 /* Never tailcall something if we are generating code for Thumb-1. */
7199 /* The PIC register is live on entry to VxWorks PLT entries, so we
7200 must make the call before restoring the PIC register. */
7201 if (TARGET_VXWORKS_RTP
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
7204 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7205 may be used both as target of the call and base register for restoring
7206 the VFP registers */
7207 if (TARGET_APCS_FRAME
&& TARGET_ARM
7208 && TARGET_HARD_FLOAT
7209 && decl
&& arm_is_long_call_p (decl
))
7212 /* If we are interworking and the function is not declared static
7213 then we can't tail-call it unless we know that it exists in this
7214 compilation unit (since it might be a Thumb routine). */
7215 if (TARGET_INTERWORK
&& decl
&& TREE_PUBLIC (decl
)
7216 && !TREE_ASM_WRITTEN (decl
))
7219 func_type
= arm_current_func_type ();
7220 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7221 if (IS_INTERRUPT (func_type
))
7224 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7225 generated for entry functions themselves. */
7226 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7229 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7230 this would complicate matters for later code generation. */
7231 if (TREE_CODE (exp
) == CALL_EXPR
)
7233 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7234 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype
)))
7238 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
7240 /* Check that the return value locations are the same. For
7241 example that we aren't returning a value from the sibling in
7242 a VFP register but then need to transfer it to a core
7245 tree decl_or_type
= decl
;
7247 /* If it is an indirect function pointer, get the function type. */
7249 decl_or_type
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7251 a
= arm_function_value (TREE_TYPE (exp
), decl_or_type
, false);
7252 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
7254 if (!rtx_equal_p (a
, b
))
7258 /* Never tailcall if function may be called with a misaligned SP. */
7259 if (IS_STACKALIGN (func_type
))
7262 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7263 references should become a NOP. Don't convert such calls into
7265 if (TARGET_AAPCS_BASED
7266 && arm_abi
== ARM_ABI_AAPCS
7268 && DECL_WEAK (decl
))
7271 /* We cannot do a tailcall for an indirect call by descriptor if all the
7272 argument registers are used because the only register left to load the
7273 address is IP and it will already contain the static chain. */
7274 if (!decl
&& CALL_EXPR_BY_DESCRIPTOR (exp
) && !flag_trampolines
)
7276 tree fntype
= TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp
)));
7277 CUMULATIVE_ARGS cum
;
7278 cumulative_args_t cum_v
;
7280 arm_init_cumulative_args (&cum
, fntype
, NULL_RTX
, NULL_TREE
);
7281 cum_v
= pack_cumulative_args (&cum
);
7283 for (tree t
= TYPE_ARG_TYPES (fntype
); t
; t
= TREE_CHAIN (t
))
7285 tree type
= TREE_VALUE (t
);
7286 if (!VOID_TYPE_P (type
))
7287 arm_function_arg_advance (cum_v
, TYPE_MODE (type
), type
, true);
7290 if (!arm_function_arg (cum_v
, SImode
, integer_type_node
, true))
7294 /* Everything else is ok. */
7299 /* Addressing mode support functions. */
7301 /* Return nonzero if X is a legitimate immediate operand when compiling
7302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7304 legitimate_pic_operand_p (rtx x
)
7306 if (GET_CODE (x
) == SYMBOL_REF
7307 || (GET_CODE (x
) == CONST
7308 && GET_CODE (XEXP (x
, 0)) == PLUS
7309 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7315 /* Record that the current function needs a PIC register. Initialize
7316 cfun->machine->pic_reg if we have not already done so. */
7319 require_pic_register (void)
7321 /* A lot of the logic here is made obscure by the fact that this
7322 routine gets called as part of the rtx cost estimation process.
7323 We don't want those calls to affect any assumptions about the real
7324 function; and further, we can't call entry_of_function() until we
7325 start the real expansion process. */
7326 if (!crtl
->uses_pic_offset_table
)
7328 gcc_assert (can_create_pseudo_p ());
7329 if (arm_pic_register
!= INVALID_REGNUM
7330 && !(TARGET_THUMB1
&& arm_pic_register
> LAST_LO_REGNUM
))
7332 if (!cfun
->machine
->pic_reg
)
7333 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
7335 /* Play games to avoid marking the function as needing pic
7336 if we are being called as part of the cost-estimation
7338 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7339 crtl
->uses_pic_offset_table
= 1;
7343 rtx_insn
*seq
, *insn
;
7345 if (!cfun
->machine
->pic_reg
)
7346 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
7348 /* Play games to avoid marking the function as needing pic
7349 if we are being called as part of the cost-estimation
7351 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
7353 crtl
->uses_pic_offset_table
= 1;
7356 if (TARGET_THUMB1
&& arm_pic_register
!= INVALID_REGNUM
7357 && arm_pic_register
> LAST_LO_REGNUM
)
7358 emit_move_insn (cfun
->machine
->pic_reg
,
7359 gen_rtx_REG (Pmode
, arm_pic_register
));
7361 arm_load_pic_register (0UL);
7366 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
7368 INSN_LOCATION (insn
) = prologue_location
;
7370 /* We can be called during expansion of PHI nodes, where
7371 we can't yet emit instructions directly in the final
7372 insn stream. Queue the insns on the entry edge, they will
7373 be committed after everything else is expanded. */
7374 insert_insn_on_edge (seq
,
7375 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
7382 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
7384 if (GET_CODE (orig
) == SYMBOL_REF
7385 || GET_CODE (orig
) == LABEL_REF
)
7389 gcc_assert (can_create_pseudo_p ());
7390 reg
= gen_reg_rtx (Pmode
);
7393 /* VxWorks does not impose a fixed gap between segments; the run-time
7394 gap can be different from the object-file gap. We therefore can't
7395 use GOTOFF unless we are absolutely sure that the symbol is in the
7396 same segment as the GOT. Unfortunately, the flexibility of linker
7397 scripts means that we can't be sure of that in general, so assume
7398 that GOTOFF is never valid on VxWorks. */
7399 /* References to weak symbols cannot be resolved locally: they
7400 may be overridden by a non-weak definition at link time. */
7402 if ((GET_CODE (orig
) == LABEL_REF
7403 || (GET_CODE (orig
) == SYMBOL_REF
7404 && SYMBOL_REF_LOCAL_P (orig
)
7405 && (SYMBOL_REF_DECL (orig
)
7406 ? !DECL_WEAK (SYMBOL_REF_DECL (orig
)) : 1)))
7408 && arm_pic_data_is_text_relative
)
7409 insn
= arm_pic_static_addr (orig
, reg
);
7415 /* If this function doesn't have a pic register, create one now. */
7416 require_pic_register ();
7418 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
7420 /* Make the MEM as close to a constant as possible. */
7421 mem
= SET_SRC (pat
);
7422 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
7423 MEM_READONLY_P (mem
) = 1;
7424 MEM_NOTRAP_P (mem
) = 1;
7426 insn
= emit_insn (pat
);
7429 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7431 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
7435 else if (GET_CODE (orig
) == CONST
)
7439 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7440 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
7443 /* Handle the case where we have: const (UNSPEC_TLS). */
7444 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
7445 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
7448 /* Handle the case where we have:
7449 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7451 if (GET_CODE (XEXP (orig
, 0)) == PLUS
7452 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
7453 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
7455 gcc_assert (CONST_INT_P (XEXP (XEXP (orig
, 0), 1)));
7461 gcc_assert (can_create_pseudo_p ());
7462 reg
= gen_reg_rtx (Pmode
);
7465 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
7467 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
7468 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
7469 base
== reg
? 0 : reg
);
7471 if (CONST_INT_P (offset
))
7473 /* The base register doesn't really matter, we only want to
7474 test the index for the appropriate mode. */
7475 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
7477 gcc_assert (can_create_pseudo_p ());
7478 offset
= force_reg (Pmode
, offset
);
7481 if (CONST_INT_P (offset
))
7482 return plus_constant (Pmode
, base
, INTVAL (offset
));
7485 if (GET_MODE_SIZE (mode
) > 4
7486 && (GET_MODE_CLASS (mode
) == MODE_INT
7487 || TARGET_SOFT_FLOAT
))
7489 emit_insn (gen_addsi3 (reg
, base
, offset
));
7493 return gen_rtx_PLUS (Pmode
, base
, offset
);
7500 /* Find a spare register to use during the prolog of a function. */
7503 thumb_find_work_register (unsigned long pushed_regs_mask
)
7507 /* Check the argument registers first as these are call-used. The
7508 register allocation order means that sometimes r3 might be used
7509 but earlier argument registers might not, so check them all. */
7510 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
7511 if (!df_regs_ever_live_p (reg
))
7514 /* Before going on to check the call-saved registers we can try a couple
7515 more ways of deducing that r3 is available. The first is when we are
7516 pushing anonymous arguments onto the stack and we have less than 4
7517 registers worth of fixed arguments(*). In this case r3 will be part of
7518 the variable argument list and so we can be sure that it will be
7519 pushed right at the start of the function. Hence it will be available
7520 for the rest of the prologue.
7521 (*): ie crtl->args.pretend_args_size is greater than 0. */
7522 if (cfun
->machine
->uses_anonymous_args
7523 && crtl
->args
.pretend_args_size
> 0)
7524 return LAST_ARG_REGNUM
;
7526 /* The other case is when we have fixed arguments but less than 4 registers
7527 worth. In this case r3 might be used in the body of the function, but
7528 it is not being used to convey an argument into the function. In theory
7529 we could just check crtl->args.size to see how many bytes are
7530 being passed in argument registers, but it seems that it is unreliable.
7531 Sometimes it will have the value 0 when in fact arguments are being
7532 passed. (See testcase execute/20021111-1.c for an example). So we also
7533 check the args_info.nregs field as well. The problem with this field is
7534 that it makes no allowances for arguments that are passed to the
7535 function but which are not used. Hence we could miss an opportunity
7536 when a function has an unused argument in r3. But it is better to be
7537 safe than to be sorry. */
7538 if (! cfun
->machine
->uses_anonymous_args
7539 && crtl
->args
.size
>= 0
7540 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
7541 && (TARGET_AAPCS_BASED
7542 ? crtl
->args
.info
.aapcs_ncrn
< 4
7543 : crtl
->args
.info
.nregs
< 4))
7544 return LAST_ARG_REGNUM
;
7546 /* Otherwise look for a call-saved register that is going to be pushed. */
7547 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
7548 if (pushed_regs_mask
& (1 << reg
))
7553 /* Thumb-2 can use high regs. */
7554 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
7555 if (pushed_regs_mask
& (1 << reg
))
7558 /* Something went wrong - thumb_compute_save_reg_mask()
7559 should have arranged for a suitable register to be pushed. */
7563 static GTY(()) int pic_labelno
;
7565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
7571 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
7573 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
7576 gcc_assert (flag_pic
);
7578 pic_reg
= cfun
->machine
->pic_reg
;
7579 if (TARGET_VXWORKS_RTP
)
7581 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
7582 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7583 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
7585 emit_insn (gen_rtx_SET (pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
7587 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
7588 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
7592 /* We use an UNSPEC rather than a LABEL_REF because this label
7593 never appears in the code stream. */
7595 labelno
= GEN_INT (pic_labelno
++);
7596 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7597 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7599 /* On the ARM the PC register contains 'dot + 8' at the time of the
7600 addition, on the Thumb it is 'dot + 4'. */
7601 pic_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7602 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
7604 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
7608 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7610 else /* TARGET_THUMB1 */
7612 if (arm_pic_register
!= INVALID_REGNUM
7613 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
7615 /* We will have pushed the pic register, so we should always be
7616 able to find a work register. */
7617 pic_tmp
= gen_rtx_REG (SImode
,
7618 thumb_find_work_register (saved_regs
));
7619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
7620 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
7621 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
7623 else if (arm_pic_register
!= INVALID_REGNUM
7624 && arm_pic_register
> LAST_LO_REGNUM
7625 && REGNO (pic_reg
) <= LAST_LO_REGNUM
)
7627 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7628 emit_move_insn (gen_rtx_REG (Pmode
, arm_pic_register
), pic_reg
);
7629 emit_use (gen_rtx_REG (Pmode
, arm_pic_register
));
7632 emit_insn (gen_pic_load_addr_unified (pic_reg
, pic_rtx
, labelno
));
7636 /* Need to emit this whether or not we obey regdecls,
7637 since setjmp/longjmp can cause life info to screw up. */
7641 /* Generate code to load the address of a static var when flag_pic is set. */
7643 arm_pic_static_addr (rtx orig
, rtx reg
)
7645 rtx l1
, labelno
, offset_rtx
;
7647 gcc_assert (flag_pic
);
7649 /* We use an UNSPEC rather than a LABEL_REF because this label
7650 never appears in the code stream. */
7651 labelno
= GEN_INT (pic_labelno
++);
7652 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
7653 l1
= gen_rtx_CONST (VOIDmode
, l1
);
7655 /* On the ARM the PC register contains 'dot + 8' at the time of the
7656 addition, on the Thumb it is 'dot + 4'. */
7657 offset_rtx
= plus_constant (Pmode
, l1
, TARGET_ARM
? 8 : 4);
7658 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
7659 UNSPEC_SYMBOL_OFFSET
);
7660 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
7662 return emit_insn (gen_pic_load_addr_unified (reg
, offset_rtx
, labelno
));
7665 /* Return nonzero if X is valid as an ARM state addressing register. */
7667 arm_address_register_rtx_p (rtx x
, int strict_p
)
7677 return ARM_REGNO_OK_FOR_BASE_P (regno
);
7679 return (regno
<= LAST_ARM_REGNUM
7680 || regno
>= FIRST_PSEUDO_REGISTER
7681 || regno
== FRAME_POINTER_REGNUM
7682 || regno
== ARG_POINTER_REGNUM
);
7685 /* Return TRUE if this rtx is the difference of a symbol and a label,
7686 and will reduce to a PC-relative relocation in the object file.
7687 Expressions like this can be left alone when generating PIC, rather
7688 than forced through the GOT. */
7690 pcrel_constant_p (rtx x
)
7692 if (GET_CODE (x
) == MINUS
)
7693 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
7698 /* Return true if X will surely end up in an index register after next
7701 will_be_in_index_register (const_rtx x
)
7703 /* arm.md: calculate_pic_address will split this into a register. */
7704 return GET_CODE (x
) == UNSPEC
&& (XINT (x
, 1) == UNSPEC_PIC_SYM
);
7707 /* Return nonzero if X is a valid ARM state address operand. */
7709 arm_legitimate_address_outer_p (machine_mode mode
, rtx x
, RTX_CODE outer
,
7713 enum rtx_code code
= GET_CODE (x
);
7715 if (arm_address_register_rtx_p (x
, strict_p
))
7718 use_ldrd
= (TARGET_LDRD
7719 && (mode
== DImode
|| mode
== DFmode
));
7721 if (code
== POST_INC
|| code
== PRE_DEC
7722 || ((code
== PRE_INC
|| code
== POST_DEC
)
7723 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7724 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7726 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7727 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7728 && GET_CODE (XEXP (x
, 1)) == PLUS
7729 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7731 rtx addend
= XEXP (XEXP (x
, 1), 1);
7733 /* Don't allow ldrd post increment by register because it's hard
7734 to fixup invalid register choices. */
7736 && GET_CODE (x
) == POST_MODIFY
7740 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
7741 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
7744 /* After reload constants split into minipools will have addresses
7745 from a LABEL_REF. */
7746 else if (reload_completed
7747 && (code
== LABEL_REF
7749 && GET_CODE (XEXP (x
, 0)) == PLUS
7750 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7751 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7754 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7757 else if (code
== PLUS
)
7759 rtx xop0
= XEXP (x
, 0);
7760 rtx xop1
= XEXP (x
, 1);
7762 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7763 && ((CONST_INT_P (xop1
)
7764 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
7765 || (!strict_p
&& will_be_in_index_register (xop1
))))
7766 || (arm_address_register_rtx_p (xop1
, strict_p
)
7767 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
7771 /* Reload currently can't handle MINUS, so disable this for now */
7772 else if (GET_CODE (x
) == MINUS
)
7774 rtx xop0
= XEXP (x
, 0);
7775 rtx xop1
= XEXP (x
, 1);
7777 return (arm_address_register_rtx_p (xop0
, strict_p
)
7778 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
7782 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7783 && code
== SYMBOL_REF
7784 && CONSTANT_POOL_ADDRESS_P (x
)
7786 && symbol_mentioned_p (get_pool_constant (x
))
7787 && ! pcrel_constant_p (get_pool_constant (x
))))
7793 /* Return true if we can avoid creating a constant pool entry for x. */
7795 can_avoid_literal_pool_for_label_p (rtx x
)
7797 /* Normally we can assign constant values to target registers without
7798 the help of constant pool. But there are cases we have to use constant
7800 1) assign a label to register.
7801 2) sign-extend a 8bit value to 32bit and then assign to register.
7803 Constant pool access in format:
7804 (set (reg r0) (mem (symbol_ref (".LC0"))))
7805 will cause the use of literal pool (later in function arm_reorg).
7806 So here we mark such format as an invalid format, then the compiler
7807 will adjust it into:
7808 (set (reg r0) (symbol_ref (".LC0")))
7809 (set (reg r0) (mem (reg r0))).
7810 No extra register is required, and (mem (reg r0)) won't cause the use
7811 of literal pools. */
7812 if (arm_disable_literal_pool
&& GET_CODE (x
) == SYMBOL_REF
7813 && CONSTANT_POOL_ADDRESS_P (x
))
7819 /* Return nonzero if X is a valid Thumb-2 address operand. */
7821 thumb2_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
7824 enum rtx_code code
= GET_CODE (x
);
7826 if (arm_address_register_rtx_p (x
, strict_p
))
7829 use_ldrd
= (TARGET_LDRD
7830 && (mode
== DImode
|| mode
== DFmode
));
7832 if (code
== POST_INC
|| code
== PRE_DEC
7833 || ((code
== PRE_INC
|| code
== POST_DEC
)
7834 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
7835 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
7837 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
7838 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
7839 && GET_CODE (XEXP (x
, 1)) == PLUS
7840 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
7842 /* Thumb-2 only has autoincrement by constant. */
7843 rtx addend
= XEXP (XEXP (x
, 1), 1);
7844 HOST_WIDE_INT offset
;
7846 if (!CONST_INT_P (addend
))
7849 offset
= INTVAL(addend
);
7850 if (GET_MODE_SIZE (mode
) <= 4)
7851 return (offset
> -256 && offset
< 256);
7853 return (use_ldrd
&& offset
> -1024 && offset
< 1024
7854 && (offset
& 3) == 0);
7857 /* After reload constants split into minipools will have addresses
7858 from a LABEL_REF. */
7859 else if (reload_completed
7860 && (code
== LABEL_REF
7862 && GET_CODE (XEXP (x
, 0)) == PLUS
7863 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
7864 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
7867 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
7870 else if (code
== PLUS
)
7872 rtx xop0
= XEXP (x
, 0);
7873 rtx xop1
= XEXP (x
, 1);
7875 return ((arm_address_register_rtx_p (xop0
, strict_p
)
7876 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
7877 || (!strict_p
&& will_be_in_index_register (xop1
))))
7878 || (arm_address_register_rtx_p (xop1
, strict_p
)
7879 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
7882 else if (can_avoid_literal_pool_for_label_p (x
))
7885 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
7886 && code
== SYMBOL_REF
7887 && CONSTANT_POOL_ADDRESS_P (x
)
7889 && symbol_mentioned_p (get_pool_constant (x
))
7890 && ! pcrel_constant_p (get_pool_constant (x
))))
7896 /* Return nonzero if INDEX is valid for an address index operand in
7899 arm_legitimate_index_p (machine_mode mode
, rtx index
, RTX_CODE outer
,
7902 HOST_WIDE_INT range
;
7903 enum rtx_code code
= GET_CODE (index
);
7905 /* Standard coprocessor addressing modes. */
7906 if (TARGET_HARD_FLOAT
7907 && (mode
== SFmode
|| mode
== DFmode
))
7908 return (code
== CONST_INT
&& INTVAL (index
) < 1024
7909 && INTVAL (index
) > -1024
7910 && (INTVAL (index
) & 3) == 0);
7912 /* For quad modes, we restrict the constant offset to be slightly less
7913 than what the instruction format permits. We do this because for
7914 quad mode moves, we will actually decompose them into two separate
7915 double-mode reads or writes. INDEX must therefore be a valid
7916 (double-mode) offset and so should INDEX+8. */
7917 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
7918 return (code
== CONST_INT
7919 && INTVAL (index
) < 1016
7920 && INTVAL (index
) > -1024
7921 && (INTVAL (index
) & 3) == 0);
7923 /* We have no such constraint on double mode offsets, so we permit the
7924 full range of the instruction format. */
7925 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
7926 return (code
== CONST_INT
7927 && INTVAL (index
) < 1024
7928 && INTVAL (index
) > -1024
7929 && (INTVAL (index
) & 3) == 0);
7931 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
7932 return (code
== CONST_INT
7933 && INTVAL (index
) < 1024
7934 && INTVAL (index
) > -1024
7935 && (INTVAL (index
) & 3) == 0);
7937 if (arm_address_register_rtx_p (index
, strict_p
)
7938 && (GET_MODE_SIZE (mode
) <= 4))
7941 if (mode
== DImode
|| mode
== DFmode
)
7943 if (code
== CONST_INT
)
7945 HOST_WIDE_INT val
= INTVAL (index
);
7947 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7948 If vldr is selected it uses arm_coproc_mem_operand. */
7950 return val
> -256 && val
< 256;
7952 return val
> -4096 && val
< 4092;
7955 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
7958 if (GET_MODE_SIZE (mode
) <= 4
7962 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
7966 rtx xiop0
= XEXP (index
, 0);
7967 rtx xiop1
= XEXP (index
, 1);
7969 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
7970 && power_of_two_operand (xiop1
, SImode
))
7971 || (arm_address_register_rtx_p (xiop1
, strict_p
)
7972 && power_of_two_operand (xiop0
, SImode
)));
7974 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
7975 || code
== ASHIFT
|| code
== ROTATERT
)
7977 rtx op
= XEXP (index
, 1);
7979 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
7982 && INTVAL (op
) <= 31);
7986 /* For ARM v4 we may be doing a sign-extend operation during the
7992 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
7998 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
8000 return (code
== CONST_INT
8001 && INTVAL (index
) < range
8002 && INTVAL (index
) > -range
);
8005 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8006 index operand. i.e. 1, 2, 4 or 8. */
8008 thumb2_index_mul_operand (rtx op
)
8012 if (!CONST_INT_P (op
))
8016 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
8019 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8021 thumb2_legitimate_index_p (machine_mode mode
, rtx index
, int strict_p
)
8023 enum rtx_code code
= GET_CODE (index
);
8025 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8026 /* Standard coprocessor addressing modes. */
8027 if (TARGET_HARD_FLOAT
8028 && (mode
== SFmode
|| mode
== DFmode
))
8029 return (code
== CONST_INT
&& INTVAL (index
) < 1024
8030 /* Thumb-2 allows only > -256 index range for it's core register
8031 load/stores. Since we allow SF/DF in core registers, we have
8032 to use the intersection between -256~4096 (core) and -1024~1024
8034 && INTVAL (index
) > -256
8035 && (INTVAL (index
) & 3) == 0);
8037 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
8039 /* For DImode assume values will usually live in core regs
8040 and only allow LDRD addressing modes. */
8041 if (!TARGET_LDRD
|| mode
!= DImode
)
8042 return (code
== CONST_INT
8043 && INTVAL (index
) < 1024
8044 && INTVAL (index
) > -1024
8045 && (INTVAL (index
) & 3) == 0);
8048 /* For quad modes, we restrict the constant offset to be slightly less
8049 than what the instruction format permits. We do this because for
8050 quad mode moves, we will actually decompose them into two separate
8051 double-mode reads or writes. INDEX must therefore be a valid
8052 (double-mode) offset and so should INDEX+8. */
8053 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
8054 return (code
== CONST_INT
8055 && INTVAL (index
) < 1016
8056 && INTVAL (index
) > -1024
8057 && (INTVAL (index
) & 3) == 0);
8059 /* We have no such constraint on double mode offsets, so we permit the
8060 full range of the instruction format. */
8061 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
8062 return (code
== CONST_INT
8063 && INTVAL (index
) < 1024
8064 && INTVAL (index
) > -1024
8065 && (INTVAL (index
) & 3) == 0);
8067 if (arm_address_register_rtx_p (index
, strict_p
)
8068 && (GET_MODE_SIZE (mode
) <= 4))
8071 if (mode
== DImode
|| mode
== DFmode
)
8073 if (code
== CONST_INT
)
8075 HOST_WIDE_INT val
= INTVAL (index
);
8076 /* Thumb-2 ldrd only has reg+const addressing modes.
8077 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8078 If vldr is selected it uses arm_coproc_mem_operand. */
8080 return IN_RANGE (val
, -1020, 1020) && (val
& 3) == 0;
8082 return IN_RANGE (val
, -255, 4095 - 4);
8090 rtx xiop0
= XEXP (index
, 0);
8091 rtx xiop1
= XEXP (index
, 1);
8093 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
8094 && thumb2_index_mul_operand (xiop1
))
8095 || (arm_address_register_rtx_p (xiop1
, strict_p
)
8096 && thumb2_index_mul_operand (xiop0
)));
8098 else if (code
== ASHIFT
)
8100 rtx op
= XEXP (index
, 1);
8102 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
8105 && INTVAL (op
) <= 3);
8108 return (code
== CONST_INT
8109 && INTVAL (index
) < 4096
8110 && INTVAL (index
) > -256);
8113 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8115 thumb1_base_register_rtx_p (rtx x
, machine_mode mode
, int strict_p
)
8125 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
8127 return (regno
<= LAST_LO_REGNUM
8128 || regno
> LAST_VIRTUAL_REGISTER
8129 || regno
== FRAME_POINTER_REGNUM
8130 || (GET_MODE_SIZE (mode
) >= 4
8131 && (regno
== STACK_POINTER_REGNUM
8132 || regno
>= FIRST_PSEUDO_REGISTER
8133 || x
== hard_frame_pointer_rtx
8134 || x
== arg_pointer_rtx
)));
8137 /* Return nonzero if x is a legitimate index register. This is the case
8138 for any base register that can access a QImode object. */
8140 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
8142 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
8145 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8147 The AP may be eliminated to either the SP or the FP, so we use the
8148 least common denominator, e.g. SImode, and offsets from 0 to 64.
8150 ??? Verify whether the above is the right approach.
8152 ??? Also, the FP may be eliminated to the SP, so perhaps that
8153 needs special handling also.
8155 ??? Look at how the mips16 port solves this problem. It probably uses
8156 better ways to solve some of these problems.
8158 Although it is not incorrect, we don't accept QImode and HImode
8159 addresses based on the frame pointer or arg pointer until the
8160 reload pass starts. This is so that eliminating such addresses
8161 into stack based ones won't produce impossible code. */
8163 thumb1_legitimate_address_p (machine_mode mode
, rtx x
, int strict_p
)
8165 if (TARGET_HAVE_MOVT
&& can_avoid_literal_pool_for_label_p (x
))
8168 /* ??? Not clear if this is right. Experiment. */
8169 if (GET_MODE_SIZE (mode
) < 4
8170 && !(reload_in_progress
|| reload_completed
)
8171 && (reg_mentioned_p (frame_pointer_rtx
, x
)
8172 || reg_mentioned_p (arg_pointer_rtx
, x
)
8173 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
8174 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
8175 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
8176 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
8179 /* Accept any base register. SP only in SImode or larger. */
8180 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
8183 /* This is PC relative data before arm_reorg runs. */
8184 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
8185 && GET_CODE (x
) == SYMBOL_REF
8186 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
8189 /* This is PC relative data after arm_reorg runs. */
8190 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
8192 && (GET_CODE (x
) == LABEL_REF
8193 || (GET_CODE (x
) == CONST
8194 && GET_CODE (XEXP (x
, 0)) == PLUS
8195 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
8196 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))))
8199 /* Post-inc indexing only supported for SImode and larger. */
8200 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
8201 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
8204 else if (GET_CODE (x
) == PLUS
)
8206 /* REG+REG address can be any two index registers. */
8207 /* We disallow FRAME+REG addressing since we know that FRAME
8208 will be replaced with STACK, and SP relative addressing only
8209 permits SP+OFFSET. */
8210 if (GET_MODE_SIZE (mode
) <= 4
8211 && XEXP (x
, 0) != frame_pointer_rtx
8212 && XEXP (x
, 1) != frame_pointer_rtx
8213 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8214 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
8215 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
8218 /* REG+const has 5-7 bit offset for non-SP registers. */
8219 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
8220 || XEXP (x
, 0) == arg_pointer_rtx
)
8221 && CONST_INT_P (XEXP (x
, 1))
8222 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
8225 /* REG+const has 10-bit offset for SP, but only SImode and
8226 larger is supported. */
8227 /* ??? Should probably check for DI/DFmode overflow here
8228 just like GO_IF_LEGITIMATE_OFFSET does. */
8229 else if (REG_P (XEXP (x
, 0))
8230 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
8231 && GET_MODE_SIZE (mode
) >= 4
8232 && CONST_INT_P (XEXP (x
, 1))
8233 && INTVAL (XEXP (x
, 1)) >= 0
8234 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
8235 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8238 else if (REG_P (XEXP (x
, 0))
8239 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
8240 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
8241 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
8242 && REGNO (XEXP (x
, 0))
8243 <= LAST_VIRTUAL_POINTER_REGISTER
))
8244 && GET_MODE_SIZE (mode
) >= 4
8245 && CONST_INT_P (XEXP (x
, 1))
8246 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
8250 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
8251 && GET_MODE_SIZE (mode
) == 4
8252 && GET_CODE (x
) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (x
)
8255 && symbol_mentioned_p (get_pool_constant (x
))
8256 && ! pcrel_constant_p (get_pool_constant (x
))))
8262 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8263 instruction of mode MODE. */
8265 thumb_legitimate_offset_p (machine_mode mode
, HOST_WIDE_INT val
)
8267 switch (GET_MODE_SIZE (mode
))
8270 return val
>= 0 && val
< 32;
8273 return val
>= 0 && val
< 64 && (val
& 1) == 0;
8277 && (val
+ GET_MODE_SIZE (mode
)) <= 128
8283 arm_legitimate_address_p (machine_mode mode
, rtx x
, bool strict_p
)
8286 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
8287 else if (TARGET_THUMB2
)
8288 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
8289 else /* if (TARGET_THUMB1) */
8290 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
8293 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8295 Given an rtx X being reloaded into a reg required to be
8296 in class CLASS, return the class of reg to actually use.
8297 In general this is just CLASS, but for the Thumb core registers and
8298 immediate constants we prefer a LO_REGS class or a subset. */
8301 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t rclass
)
8307 if (rclass
== GENERAL_REGS
)
8314 /* Build the SYMBOL_REF for __tls_get_addr. */
8316 static GTY(()) rtx tls_get_addr_libfunc
;
8319 get_tls_get_addr (void)
8321 if (!tls_get_addr_libfunc
)
8322 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
8323 return tls_get_addr_libfunc
;
8327 arm_load_tp (rtx target
)
8330 target
= gen_reg_rtx (SImode
);
8334 /* Can return in any reg. */
8335 emit_insn (gen_load_tp_hard (target
));
8339 /* Always returned in r0. Immediately copy the result into a pseudo,
8340 otherwise other uses of r0 (e.g. setting up function arguments) may
8341 clobber the value. */
8345 emit_insn (gen_load_tp_soft ());
8347 tmp
= gen_rtx_REG (SImode
, R0_REGNUM
);
8348 emit_move_insn (target
, tmp
);
8354 load_tls_operand (rtx x
, rtx reg
)
8358 if (reg
== NULL_RTX
)
8359 reg
= gen_reg_rtx (SImode
);
8361 tmp
= gen_rtx_CONST (SImode
, x
);
8363 emit_move_insn (reg
, tmp
);
8369 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
8371 rtx label
, labelno
, sum
;
8373 gcc_assert (reloc
!= TLS_DESCSEQ
);
8376 labelno
= GEN_INT (pic_labelno
++);
8377 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8378 label
= gen_rtx_CONST (VOIDmode
, label
);
8380 sum
= gen_rtx_UNSPEC (Pmode
,
8381 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
8382 GEN_INT (TARGET_ARM
? 8 : 4)),
8384 reg
= load_tls_operand (sum
, reg
);
8387 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
8389 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8391 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
8392 LCT_PURE
, /* LCT_CONST? */
8395 rtx_insn
*insns
= get_insns ();
8402 arm_tls_descseq_addr (rtx x
, rtx reg
)
8404 rtx labelno
= GEN_INT (pic_labelno
++);
8405 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8406 rtx sum
= gen_rtx_UNSPEC (Pmode
,
8407 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
8408 gen_rtx_CONST (VOIDmode
, label
),
8409 GEN_INT (!TARGET_ARM
)),
8411 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, R0_REGNUM
));
8413 emit_insn (gen_tlscall (x
, labelno
));
8415 reg
= gen_reg_rtx (SImode
);
8417 gcc_assert (REGNO (reg
) != R0_REGNUM
);
8419 emit_move_insn (reg
, reg0
);
8425 legitimize_tls_address (rtx x
, rtx reg
)
8427 rtx dest
, tp
, label
, labelno
, sum
, ret
, eqv
, addend
;
8429 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
8433 case TLS_MODEL_GLOBAL_DYNAMIC
:
8434 if (TARGET_GNU2_TLS
)
8436 reg
= arm_tls_descseq_addr (x
, reg
);
8438 tp
= arm_load_tp (NULL_RTX
);
8440 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8444 /* Original scheme */
8445 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
8446 dest
= gen_reg_rtx (Pmode
);
8447 emit_libcall_block (insns
, dest
, ret
, x
);
8451 case TLS_MODEL_LOCAL_DYNAMIC
:
8452 if (TARGET_GNU2_TLS
)
8454 reg
= arm_tls_descseq_addr (x
, reg
);
8456 tp
= arm_load_tp (NULL_RTX
);
8458 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
8462 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
8464 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8465 share the LDM result with other LD model accesses. */
8466 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
8468 dest
= gen_reg_rtx (Pmode
);
8469 emit_libcall_block (insns
, dest
, ret
, eqv
);
8471 /* Load the addend. */
8472 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
8473 GEN_INT (TLS_LDO32
)),
8475 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
8476 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
8480 case TLS_MODEL_INITIAL_EXEC
:
8481 labelno
= GEN_INT (pic_labelno
++);
8482 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
8483 label
= gen_rtx_CONST (VOIDmode
, label
);
8484 sum
= gen_rtx_UNSPEC (Pmode
,
8485 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
8486 GEN_INT (TARGET_ARM
? 8 : 4)),
8488 reg
= load_tls_operand (sum
, reg
);
8491 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
8492 else if (TARGET_THUMB2
)
8493 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
8496 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
8497 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
8500 tp
= arm_load_tp (NULL_RTX
);
8502 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8504 case TLS_MODEL_LOCAL_EXEC
:
8505 tp
= arm_load_tp (NULL_RTX
);
8507 reg
= gen_rtx_UNSPEC (Pmode
,
8508 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
8510 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
8512 return gen_rtx_PLUS (Pmode
, tp
, reg
);
8519 /* Try machine-dependent ways of modifying an illegitimate address
8520 to be legitimate. If we find one, return the new, valid address. */
8522 arm_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8524 if (arm_tls_referenced_p (x
))
8528 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
8530 addend
= XEXP (XEXP (x
, 0), 1);
8531 x
= XEXP (XEXP (x
, 0), 0);
8534 if (GET_CODE (x
) != SYMBOL_REF
)
8537 gcc_assert (SYMBOL_REF_TLS_MODEL (x
) != 0);
8539 x
= legitimize_tls_address (x
, NULL_RTX
);
8543 x
= gen_rtx_PLUS (SImode
, x
, addend
);
8552 /* TODO: legitimize_address for Thumb2. */
8555 return thumb_legitimize_address (x
, orig_x
, mode
);
8558 if (GET_CODE (x
) == PLUS
)
8560 rtx xop0
= XEXP (x
, 0);
8561 rtx xop1
= XEXP (x
, 1);
8563 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
8564 xop0
= force_reg (SImode
, xop0
);
8566 if (CONSTANT_P (xop1
) && !CONST_INT_P (xop1
)
8567 && !symbol_mentioned_p (xop1
))
8568 xop1
= force_reg (SImode
, xop1
);
8570 if (ARM_BASE_REGISTER_RTX_P (xop0
)
8571 && CONST_INT_P (xop1
))
8573 HOST_WIDE_INT n
, low_n
;
8577 /* VFP addressing modes actually allow greater offsets, but for
8578 now we just stick with the lowest common denominator. */
8579 if (mode
== DImode
|| mode
== DFmode
)
8591 low_n
= ((mode
) == TImode
? 0
8592 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
8596 base_reg
= gen_reg_rtx (SImode
);
8597 val
= force_operand (plus_constant (Pmode
, xop0
, n
), NULL_RTX
);
8598 emit_move_insn (base_reg
, val
);
8599 x
= plus_constant (Pmode
, base_reg
, low_n
);
8601 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8602 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8605 /* XXX We don't allow MINUS any more -- see comment in
8606 arm_legitimate_address_outer_p (). */
8607 else if (GET_CODE (x
) == MINUS
)
8609 rtx xop0
= XEXP (x
, 0);
8610 rtx xop1
= XEXP (x
, 1);
8612 if (CONSTANT_P (xop0
))
8613 xop0
= force_reg (SImode
, xop0
);
8615 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
8616 xop1
= force_reg (SImode
, xop1
);
8618 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
8619 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
8622 /* Make sure to take full advantage of the pre-indexed addressing mode
8623 with absolute addresses which often allows for the base register to
8624 be factorized for multiple adjacent memory references, and it might
8625 even allows for the mini pool to be avoided entirely. */
8626 else if (CONST_INT_P (x
) && optimize
> 0)
8629 HOST_WIDE_INT mask
, base
, index
;
8632 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8633 use a 8-bit index. So let's use a 12-bit index for SImode only and
8634 hope that arm_gen_constant will enable ldrb to use more bits. */
8635 bits
= (mode
== SImode
) ? 12 : 8;
8636 mask
= (1 << bits
) - 1;
8637 base
= INTVAL (x
) & ~mask
;
8638 index
= INTVAL (x
) & mask
;
8639 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
8641 /* It'll most probably be more efficient to generate the base
8642 with more bits set and use a negative index instead. */
8646 base_reg
= force_reg (SImode
, GEN_INT (base
));
8647 x
= plus_constant (Pmode
, base_reg
, index
);
8652 /* We need to find and carefully transform any SYMBOL and LABEL
8653 references; so go back to the original address expression. */
8654 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8656 if (new_x
!= orig_x
)
8664 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8665 to be legitimate. If we find one, return the new, valid address. */
8667 thumb_legitimize_address (rtx x
, rtx orig_x
, machine_mode mode
)
8669 if (GET_CODE (x
) == PLUS
8670 && CONST_INT_P (XEXP (x
, 1))
8671 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
8672 || INTVAL (XEXP (x
, 1)) < 0))
8674 rtx xop0
= XEXP (x
, 0);
8675 rtx xop1
= XEXP (x
, 1);
8676 HOST_WIDE_INT offset
= INTVAL (xop1
);
8678 /* Try and fold the offset into a biasing of the base register and
8679 then offsetting that. Don't do this when optimizing for space
8680 since it can cause too many CSEs. */
8681 if (optimize_size
&& offset
>= 0
8682 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
8684 HOST_WIDE_INT delta
;
8687 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
8688 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
8689 delta
= 31 * GET_MODE_SIZE (mode
);
8691 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
8693 xop0
= force_operand (plus_constant (Pmode
, xop0
, offset
- delta
),
8695 x
= plus_constant (Pmode
, xop0
, delta
);
8697 else if (offset
< 0 && offset
> -256)
8698 /* Small negative offsets are best done with a subtract before the
8699 dereference, forcing these into a register normally takes two
8701 x
= force_operand (x
, NULL_RTX
);
8704 /* For the remaining cases, force the constant into a register. */
8705 xop1
= force_reg (SImode
, xop1
);
8706 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
8709 else if (GET_CODE (x
) == PLUS
8710 && s_register_operand (XEXP (x
, 1), SImode
)
8711 && !s_register_operand (XEXP (x
, 0), SImode
))
8713 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
8715 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
8720 /* We need to find and carefully transform any SYMBOL and LABEL
8721 references; so go back to the original address expression. */
8722 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
8724 if (new_x
!= orig_x
)
8731 /* Return TRUE if X contains any TLS symbol references. */
8734 arm_tls_referenced_p (rtx x
)
8736 if (! TARGET_HAVE_TLS
)
8739 subrtx_iterator::array_type array
;
8740 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
8742 const_rtx x
= *iter
;
8743 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
8745 /* ARM currently does not provide relocations to encode TLS variables
8746 into AArch32 instructions, only data, so there is no way to
8747 currently implement these if a literal pool is disabled. */
8748 if (arm_disable_literal_pool
)
8749 sorry ("accessing thread-local storage is not currently supported "
8750 "with -mpure-code or -mslow-flash-data");
8755 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8756 TLS offsets, not real symbol references. */
8757 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8758 iter
.skip_subrtxes ();
8763 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8765 On the ARM, allow any integer (invalid ones are removed later by insn
8766 patterns), nice doubles and symbol_refs which refer to the function's
8769 When generating pic allow anything. */
8772 arm_legitimate_constant_p_1 (machine_mode
, rtx x
)
8774 return flag_pic
|| !label_mentioned_p (x
);
8778 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8780 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8781 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8782 for ARMv8-M Baseline or later the result is valid. */
8783 if (TARGET_HAVE_MOVT
&& GET_CODE (x
) == HIGH
)
8786 return (CONST_INT_P (x
)
8787 || CONST_DOUBLE_P (x
)
8788 || CONSTANT_ADDRESS_P (x
)
8789 || (TARGET_HAVE_MOVT
&& GET_CODE (x
) == SYMBOL_REF
)
8794 arm_legitimate_constant_p (machine_mode mode
, rtx x
)
8796 return (!arm_cannot_force_const_mem (mode
, x
)
8798 ? arm_legitimate_constant_p_1 (mode
, x
)
8799 : thumb_legitimate_constant_p (mode
, x
)));
8802 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8805 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
8809 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
8811 split_const (x
, &base
, &offset
);
8812 if (GET_CODE (base
) == SYMBOL_REF
8813 && !offset_within_block_p (base
, INTVAL (offset
)))
8816 return arm_tls_referenced_p (x
);
8819 #define REG_OR_SUBREG_REG(X) \
8821 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8823 #define REG_OR_SUBREG_RTX(X) \
8824 (REG_P (X) ? (X) : SUBREG_REG (X))
8827 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8829 machine_mode mode
= GET_MODE (x
);
8838 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8845 return COSTS_N_INSNS (1);
8848 if (arm_arch6m
&& arm_m_profile_small_mul
)
8849 return COSTS_N_INSNS (32);
8851 if (CONST_INT_P (XEXP (x
, 1)))
8854 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
8861 return COSTS_N_INSNS (2) + cycles
;
8863 return COSTS_N_INSNS (1) + 16;
8866 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8868 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
8869 return (COSTS_N_INSNS (words
)
8870 + 4 * ((MEM_P (SET_SRC (x
)))
8871 + MEM_P (SET_DEST (x
))));
8876 if (UINTVAL (x
) < 256
8877 /* 16-bit constant. */
8878 || (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000)))
8880 if (thumb_shiftable_const (INTVAL (x
)))
8881 return COSTS_N_INSNS (2);
8882 return COSTS_N_INSNS (3);
8884 else if ((outer
== PLUS
|| outer
== COMPARE
)
8885 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
8887 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
8888 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
8889 return COSTS_N_INSNS (1);
8890 else if (outer
== AND
)
8893 /* This duplicates the tests in the andsi3 expander. */
8894 for (i
= 9; i
<= 31; i
++)
8895 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
8896 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
8897 return COSTS_N_INSNS (2);
8899 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
8900 || outer
== LSHIFTRT
)
8902 return COSTS_N_INSNS (2);
8908 return COSTS_N_INSNS (3);
8926 /* XXX another guess. */
8927 /* Memory costs quite a lot for the first word, but subsequent words
8928 load at the equivalent of a single insn each. */
8929 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
8930 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
8935 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
8941 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
8942 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
8948 return total
+ COSTS_N_INSNS (1);
8950 /* Assume a two-shift sequence. Increase the cost slightly so
8951 we prefer actual shifts over an extend operation. */
8952 return total
+ 1 + COSTS_N_INSNS (2);
8959 /* Estimates the size cost of thumb1 instructions.
8960 For now most of the code is copied from thumb1_rtx_costs. We need more
8961 fine grain tuning when we have more related test cases. */
8963 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
8965 machine_mode mode
= GET_MODE (x
);
8974 return (mode
== SImode
) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8978 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8979 defined by RTL expansion, especially for the expansion of
8981 if ((GET_CODE (XEXP (x
, 0)) == MULT
8982 && power_of_two_operand (XEXP (XEXP (x
,0),1), SImode
))
8983 || (GET_CODE (XEXP (x
, 1)) == MULT
8984 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
)))
8985 return COSTS_N_INSNS (2);
8990 return COSTS_N_INSNS (1);
8993 if (CONST_INT_P (XEXP (x
, 1)))
8995 /* Thumb1 mul instruction can't operate on const. We must Load it
8996 into a register first. */
8997 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
8998 /* For the targets which have a very small and high-latency multiply
8999 unit, we prefer to synthesize the mult with up to 5 instructions,
9000 giving a good balance between size and performance. */
9001 if (arm_arch6m
&& arm_m_profile_small_mul
)
9002 return COSTS_N_INSNS (5);
9004 return COSTS_N_INSNS (1) + const_size
;
9006 return COSTS_N_INSNS (1);
9009 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9011 words
= ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x
))));
9012 cost
= COSTS_N_INSNS (words
);
9013 if (satisfies_constraint_J (SET_SRC (x
))
9014 || satisfies_constraint_K (SET_SRC (x
))
9015 /* Too big an immediate for a 2-byte mov, using MOVT. */
9016 || (CONST_INT_P (SET_SRC (x
))
9017 && UINTVAL (SET_SRC (x
)) >= 256
9019 && satisfies_constraint_j (SET_SRC (x
)))
9020 /* thumb1_movdi_insn. */
9021 || ((words
> 1) && MEM_P (SET_SRC (x
))))
9022 cost
+= COSTS_N_INSNS (1);
9028 if (UINTVAL (x
) < 256)
9029 return COSTS_N_INSNS (1);
9030 /* movw is 4byte long. */
9031 if (TARGET_HAVE_MOVT
&& !(INTVAL (x
) & 0xffff0000))
9032 return COSTS_N_INSNS (2);
9033 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9034 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
9035 return COSTS_N_INSNS (2);
9036 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9037 if (thumb_shiftable_const (INTVAL (x
)))
9038 return COSTS_N_INSNS (2);
9039 return COSTS_N_INSNS (3);
9041 else if ((outer
== PLUS
|| outer
== COMPARE
)
9042 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
9044 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
9045 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
9046 return COSTS_N_INSNS (1);
9047 else if (outer
== AND
)
9050 /* This duplicates the tests in the andsi3 expander. */
9051 for (i
= 9; i
<= 31; i
++)
9052 if ((HOST_WIDE_INT_1
<< i
) - 1 == INTVAL (x
)
9053 || (HOST_WIDE_INT_1
<< i
) - 1 == ~INTVAL (x
))
9054 return COSTS_N_INSNS (2);
9056 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
9057 || outer
== LSHIFTRT
)
9059 return COSTS_N_INSNS (2);
9065 return COSTS_N_INSNS (3);
9079 return COSTS_N_INSNS (1);
9082 return (COSTS_N_INSNS (1)
9084 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
9085 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
9086 ? COSTS_N_INSNS (1) : 0));
9090 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
9095 /* XXX still guessing. */
9096 switch (GET_MODE (XEXP (x
, 0)))
9099 return (1 + (mode
== DImode
? 4 : 0)
9100 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9103 return (4 + (mode
== DImode
? 4 : 0)
9104 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9107 return (1 + (MEM_P (XEXP (x
, 0)) ? 10 : 0));
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9123 shifter_op_p (rtx op
, rtx
*shift_reg
)
9125 enum rtx_code code
= GET_CODE (op
);
9127 if (code
== MULT
&& CONST_INT_P (XEXP (op
, 1))
9128 && exact_log2 (INTVAL (XEXP (op
, 1))) > 0)
9129 return XEXP (op
, 0);
9130 else if (code
== ROTATE
&& CONST_INT_P (XEXP (op
, 1)))
9131 return XEXP (op
, 0);
9132 else if (code
== ROTATERT
|| code
== ASHIFT
|| code
== LSHIFTRT
9133 || code
== ASHIFTRT
)
9135 if (!CONST_INT_P (XEXP (op
, 1)))
9136 *shift_reg
= XEXP (op
, 1);
9137 return XEXP (op
, 0);
9144 arm_unspec_cost (rtx x
, enum rtx_code
/* outer_code */, bool speed_p
, int *cost
)
9146 const struct cpu_cost_table
*extra_cost
= current_tune
->insn_extra_cost
;
9147 rtx_code code
= GET_CODE (x
);
9148 gcc_assert (code
== UNSPEC
|| code
== UNSPEC_VOLATILE
);
9150 switch (XINT (x
, 1))
9152 case UNSPEC_UNALIGNED_LOAD
:
9153 /* We can only do unaligned loads into the integer unit, and we can't
9155 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9157 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.load
9158 + extra_cost
->ldst
.load_unaligned
);
9161 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9162 ADDR_SPACE_GENERIC
, speed_p
);
9166 case UNSPEC_UNALIGNED_STORE
:
9167 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x
)));
9169 *cost
+= (ARM_NUM_REGS (GET_MODE (x
)) * extra_cost
->ldst
.store
9170 + extra_cost
->ldst
.store_unaligned
);
9172 *cost
+= rtx_cost (XVECEXP (x
, 0, 0), VOIDmode
, UNSPEC
, 0, speed_p
);
9174 *cost
+= arm_address_cost (XEXP (XVECEXP (x
, 0, 0), 0), GET_MODE (x
),
9175 ADDR_SPACE_GENERIC
, speed_p
);
9186 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].roundint
;
9190 *cost
= COSTS_N_INSNS (2);
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9212 ASHIFT, 1, speed_p); \
9215 *cost += extra_cost->alu.arith_shift; \
9217 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9218 ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9220 GET_MODE (shift_op), \
9227 /* RTX costs. Make an estimate of the cost of executing the operation
9228 X, which is contained with an operation with code OUTER_CODE.
9229 SPEED_P indicates whether the cost desired is the performance cost,
9230 or the size cost. The estimate is stored in COST and the return
9231 value is TRUE if the cost calculation is final, or FALSE if the
9232 caller should recurse through the operands of X to add additional
9235 We currently make no attempt to model the size savings of Thumb-2
9236 16-bit instructions. At the normal points in compilation where
9237 this code is called we have no measure of whether the condition
9238 flags are live or not, and thus no realistic way to determine what
9239 the size will eventually be. */
9241 arm_rtx_costs_internal (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
9242 const struct cpu_cost_table
*extra_cost
,
9243 int *cost
, bool speed_p
)
9245 machine_mode mode
= GET_MODE (x
);
9247 *cost
= COSTS_N_INSNS (1);
9252 *cost
= thumb1_rtx_costs (x
, code
, outer_code
);
9254 *cost
= thumb1_size_rtx_costs (x
, code
, outer_code
);
9262 /* SET RTXs don't have a mode so we get it from the destination. */
9263 mode
= GET_MODE (SET_DEST (x
));
9265 if (REG_P (SET_SRC (x
))
9266 && REG_P (SET_DEST (x
)))
9268 /* Assume that most copies can be done with a single insn,
9269 unless we don't have HW FP, in which case everything
9270 larger than word mode will require two insns. */
9271 *cost
= COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9272 && GET_MODE_SIZE (mode
) > 4)
9275 /* Conditional register moves can be encoded
9276 in 16 bits in Thumb mode. */
9277 if (!speed_p
&& TARGET_THUMB
&& outer_code
== COND_EXEC
)
9283 if (CONST_INT_P (SET_SRC (x
)))
9285 /* Handle CONST_INT here, since the value doesn't have a mode
9286 and we would otherwise be unable to work out the true cost. */
9287 *cost
= rtx_cost (SET_DEST (x
), GET_MODE (SET_DEST (x
)), SET
,
9290 /* Slightly lower the cost of setting a core reg to a constant.
9291 This helps break up chains and allows for better scheduling. */
9292 if (REG_P (SET_DEST (x
))
9293 && REGNO (SET_DEST (x
)) <= LR_REGNUM
)
9296 /* Immediate moves with an immediate in the range [0, 255] can be
9297 encoded in 16 bits in Thumb mode. */
9298 if (!speed_p
&& TARGET_THUMB
&& GET_MODE (x
) == SImode
9299 && INTVAL (x
) >= 0 && INTVAL (x
) <=255)
9301 goto const_int_cost
;
9307 /* A memory access costs 1 insn if the mode is small, or the address is
9308 a single register, otherwise it costs one insn per word. */
9309 if (REG_P (XEXP (x
, 0)))
9310 *cost
= COSTS_N_INSNS (1);
9312 && GET_CODE (XEXP (x
, 0)) == PLUS
9313 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
9314 /* This will be split into two instructions.
9315 See arm.md:calculate_pic_address. */
9316 *cost
= COSTS_N_INSNS (2);
9318 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
9320 /* For speed optimizations, add the costs of the address and
9321 accessing memory. */
9324 *cost
+= (extra_cost
->ldst
.load
9325 + arm_address_cost (XEXP (x
, 0), mode
,
9326 ADDR_SPACE_GENERIC
, speed_p
));
9328 *cost
+= extra_cost
->ldst
.load
;
9334 /* Calculations of LDM costs are complex. We assume an initial cost
9335 (ldm_1st) which will load the number of registers mentioned in
9336 ldm_regs_per_insn_1st registers; then each additional
9337 ldm_regs_per_insn_subsequent registers cost one more insn. The
9338 formula for N regs is thus:
9340 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9341 + ldm_regs_per_insn_subsequent - 1)
9342 / ldm_regs_per_insn_subsequent).
9344 Additional costs may also be added for addressing. A similar
9345 formula is used for STM. */
9347 bool is_ldm
= load_multiple_operation (x
, SImode
);
9348 bool is_stm
= store_multiple_operation (x
, SImode
);
9350 if (is_ldm
|| is_stm
)
9354 HOST_WIDE_INT nregs
= XVECLEN (x
, 0);
9355 HOST_WIDE_INT regs_per_insn_1st
= is_ldm
9356 ? extra_cost
->ldst
.ldm_regs_per_insn_1st
9357 : extra_cost
->ldst
.stm_regs_per_insn_1st
;
9358 HOST_WIDE_INT regs_per_insn_sub
= is_ldm
9359 ? extra_cost
->ldst
.ldm_regs_per_insn_subsequent
9360 : extra_cost
->ldst
.stm_regs_per_insn_subsequent
;
9362 *cost
+= regs_per_insn_1st
9363 + COSTS_N_INSNS (((MAX (nregs
- regs_per_insn_1st
, 0))
9364 + regs_per_insn_sub
- 1)
9365 / regs_per_insn_sub
);
9374 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9375 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9376 *cost
+= COSTS_N_INSNS (speed_p
9377 ? extra_cost
->fp
[mode
!= SFmode
].div
: 0);
9378 else if (mode
== SImode
&& TARGET_IDIV
)
9379 *cost
+= COSTS_N_INSNS (speed_p
? extra_cost
->mult
[0].idiv
: 0);
9381 *cost
= LIBCALL_COST (2);
9383 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9384 possible udiv is prefered. */
9385 *cost
+= (code
== DIV
? COSTS_N_INSNS (1) : 0);
9386 return false; /* All arguments must be in registers. */
9389 /* MOD by a power of 2 can be expanded as:
9391 and r0, r0, #(n - 1)
9392 and r1, r1, #(n - 1)
9393 rsbpl r0, r1, #0. */
9394 if (CONST_INT_P (XEXP (x
, 1))
9395 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
9398 *cost
+= COSTS_N_INSNS (3);
9401 *cost
+= 2 * extra_cost
->alu
.logical
9402 + extra_cost
->alu
.arith
;
9408 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9409 possible udiv is prefered. */
9410 *cost
= LIBCALL_COST (2) + (code
== MOD
? COSTS_N_INSNS (1) : 0);
9411 return false; /* All arguments must be in registers. */
9414 if (mode
== SImode
&& REG_P (XEXP (x
, 1)))
9416 *cost
+= (COSTS_N_INSNS (1)
9417 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9419 *cost
+= extra_cost
->alu
.shift_reg
;
9427 if (mode
== DImode
&& CONST_INT_P (XEXP (x
, 1)))
9429 *cost
+= (COSTS_N_INSNS (2)
9430 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
));
9432 *cost
+= 2 * extra_cost
->alu
.shift
;
9433 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9434 if (code
== ASHIFT
&& XEXP (x
, 1) == CONST1_RTX (SImode
))
9438 else if (mode
== SImode
)
9440 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9441 /* Slightly disparage register shifts at -Os, but not by much. */
9442 if (!CONST_INT_P (XEXP (x
, 1)))
9443 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9444 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9447 else if (GET_MODE_CLASS (mode
) == MODE_INT
9448 && GET_MODE_SIZE (mode
) < 4)
9452 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9453 /* Slightly disparage register shifts at -Os, but not by
9455 if (!CONST_INT_P (XEXP (x
, 1)))
9456 *cost
+= (speed_p
? extra_cost
->alu
.shift_reg
: 1
9457 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9459 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
)
9461 if (arm_arch_thumb2
&& CONST_INT_P (XEXP (x
, 1)))
9463 /* Can use SBFX/UBFX. */
9465 *cost
+= extra_cost
->alu
.bfx
;
9466 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9470 *cost
+= COSTS_N_INSNS (1);
9471 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9474 if (CONST_INT_P (XEXP (x
, 1)))
9475 *cost
+= 2 * extra_cost
->alu
.shift
;
9477 *cost
+= (extra_cost
->alu
.shift
9478 + extra_cost
->alu
.shift_reg
);
9481 /* Slightly disparage register shifts. */
9482 *cost
+= !CONST_INT_P (XEXP (x
, 1));
9487 *cost
= COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x
, 1)));
9488 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
9491 if (CONST_INT_P (XEXP (x
, 1)))
9492 *cost
+= (2 * extra_cost
->alu
.shift
9493 + extra_cost
->alu
.log_shift
);
9495 *cost
+= (extra_cost
->alu
.shift
9496 + extra_cost
->alu
.shift_reg
9497 + extra_cost
->alu
.log_shift_reg
);
9503 *cost
= LIBCALL_COST (2);
9512 *cost
+= extra_cost
->alu
.rev
;
9519 /* No rev instruction available. Look at arm_legacy_rev
9520 and thumb_legacy_rev for the form of RTL used then. */
9523 *cost
+= COSTS_N_INSNS (9);
9527 *cost
+= 6 * extra_cost
->alu
.shift
;
9528 *cost
+= 3 * extra_cost
->alu
.logical
;
9533 *cost
+= COSTS_N_INSNS (4);
9537 *cost
+= 2 * extra_cost
->alu
.shift
;
9538 *cost
+= extra_cost
->alu
.arith_shift
;
9539 *cost
+= 2 * extra_cost
->alu
.logical
;
9547 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9548 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9550 if (GET_CODE (XEXP (x
, 0)) == MULT
9551 || GET_CODE (XEXP (x
, 1)) == MULT
)
9553 rtx mul_op0
, mul_op1
, sub_op
;
9556 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9558 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9560 mul_op0
= XEXP (XEXP (x
, 0), 0);
9561 mul_op1
= XEXP (XEXP (x
, 0), 1);
9562 sub_op
= XEXP (x
, 1);
9566 mul_op0
= XEXP (XEXP (x
, 1), 0);
9567 mul_op1
= XEXP (XEXP (x
, 1), 1);
9568 sub_op
= XEXP (x
, 0);
9571 /* The first operand of the multiply may be optionally
9573 if (GET_CODE (mul_op0
) == NEG
)
9574 mul_op0
= XEXP (mul_op0
, 0);
9576 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9577 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9578 + rtx_cost (sub_op
, mode
, code
, 0, speed_p
));
9584 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9590 rtx shift_by_reg
= NULL
;
9594 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_by_reg
);
9595 if (shift_op
== NULL
)
9597 shift_op
= shifter_op_p (XEXP (x
, 1), &shift_by_reg
);
9598 non_shift_op
= XEXP (x
, 0);
9601 non_shift_op
= XEXP (x
, 1);
9603 if (shift_op
!= NULL
)
9605 if (shift_by_reg
!= NULL
)
9608 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9609 *cost
+= rtx_cost (shift_by_reg
, mode
, code
, 0, speed_p
);
9612 *cost
+= extra_cost
->alu
.arith_shift
;
9614 *cost
+= rtx_cost (shift_op
, mode
, code
, 0, speed_p
);
9615 *cost
+= rtx_cost (non_shift_op
, mode
, code
, 0, speed_p
);
9620 && GET_CODE (XEXP (x
, 1)) == MULT
)
9624 *cost
+= extra_cost
->mult
[0].add
;
9625 *cost
+= rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
);
9626 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
, MULT
, 0, speed_p
);
9627 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 1), mode
, MULT
, 1, speed_p
);
9631 if (CONST_INT_P (XEXP (x
, 0)))
9633 int insns
= arm_gen_constant (MINUS
, SImode
, NULL_RTX
,
9634 INTVAL (XEXP (x
, 0)), NULL_RTX
,
9636 *cost
= COSTS_N_INSNS (insns
);
9638 *cost
+= insns
* extra_cost
->alu
.arith
;
9639 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9643 *cost
+= extra_cost
->alu
.arith
;
9648 if (GET_MODE_CLASS (mode
) == MODE_INT
9649 && GET_MODE_SIZE (mode
) < 4)
9651 rtx shift_op
, shift_reg
;
9654 /* We check both sides of the MINUS for shifter operands since,
9655 unlike PLUS, it's not commutative. */
9657 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 0);
9658 HANDLE_NARROW_SHIFT_ARITH (MINUS
, 1);
9660 /* Slightly disparage, as we might need to widen the result. */
9663 *cost
+= extra_cost
->alu
.arith
;
9665 if (CONST_INT_P (XEXP (x
, 0)))
9667 *cost
+= rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
);
9676 *cost
+= COSTS_N_INSNS (1);
9678 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
9680 rtx op1
= XEXP (x
, 1);
9683 *cost
+= 2 * extra_cost
->alu
.arith
;
9685 if (GET_CODE (op1
) == ZERO_EXTEND
)
9686 *cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, ZERO_EXTEND
,
9689 *cost
+= rtx_cost (op1
, mode
, MINUS
, 1, speed_p
);
9690 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9694 else if (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9697 *cost
+= extra_cost
->alu
.arith
+ extra_cost
->alu
.arith_shift
;
9698 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, SIGN_EXTEND
,
9700 + rtx_cost (XEXP (x
, 1), mode
, MINUS
, 1, speed_p
));
9703 else if (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9704 || GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)
9707 *cost
+= (extra_cost
->alu
.arith
9708 + (GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
9709 ? extra_cost
->alu
.arith
9710 : extra_cost
->alu
.arith_shift
));
9711 *cost
+= (rtx_cost (XEXP (x
, 0), mode
, MINUS
, 0, speed_p
)
9712 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
9713 GET_CODE (XEXP (x
, 1)), 0, speed_p
));
9718 *cost
+= 2 * extra_cost
->alu
.arith
;
9724 *cost
= LIBCALL_COST (2);
9728 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
9729 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
9731 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9733 rtx mul_op0
, mul_op1
, add_op
;
9736 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult_addsub
;
9738 mul_op0
= XEXP (XEXP (x
, 0), 0);
9739 mul_op1
= XEXP (XEXP (x
, 0), 1);
9740 add_op
= XEXP (x
, 1);
9742 *cost
+= (rtx_cost (mul_op0
, mode
, code
, 0, speed_p
)
9743 + rtx_cost (mul_op1
, mode
, code
, 0, speed_p
)
9744 + rtx_cost (add_op
, mode
, code
, 0, speed_p
));
9750 *cost
+= extra_cost
->fp
[mode
!= SFmode
].addsub
;
9753 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9755 *cost
= LIBCALL_COST (2);
9759 /* Narrow modes can be synthesized in SImode, but the range
9760 of useful sub-operations is limited. Check for shift operations
9761 on one of the operands. Only left shifts can be used in the
9763 if (GET_MODE_CLASS (mode
) == MODE_INT
9764 && GET_MODE_SIZE (mode
) < 4)
9766 rtx shift_op
, shift_reg
;
9769 HANDLE_NARROW_SHIFT_ARITH (PLUS
, 0);
9771 if (CONST_INT_P (XEXP (x
, 1)))
9773 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9774 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9776 *cost
= COSTS_N_INSNS (insns
);
9778 *cost
+= insns
* extra_cost
->alu
.arith
;
9779 /* Slightly penalize a narrow operation as the result may
9781 *cost
+= 1 + rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9785 /* Slightly penalize a narrow operation as the result may
9789 *cost
+= extra_cost
->alu
.arith
;
9796 rtx shift_op
, shift_reg
;
9799 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9800 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
9802 /* UXTA[BH] or SXTA[BH]. */
9804 *cost
+= extra_cost
->alu
.extend_arith
;
9805 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9807 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 0, speed_p
));
9812 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
9813 if (shift_op
!= NULL
)
9818 *cost
+= extra_cost
->alu
.arith_shift_reg
;
9819 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9822 *cost
+= extra_cost
->alu
.arith_shift
;
9824 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9825 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9828 if (GET_CODE (XEXP (x
, 0)) == MULT
)
9830 rtx mul_op
= XEXP (x
, 0);
9832 if (TARGET_DSP_MULTIPLY
9833 && ((GET_CODE (XEXP (mul_op
, 0)) == SIGN_EXTEND
9834 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9835 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9836 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9837 && INTVAL (XEXP (XEXP (mul_op
, 1), 1)) == 16)))
9838 || (GET_CODE (XEXP (mul_op
, 0)) == ASHIFTRT
9839 && CONST_INT_P (XEXP (XEXP (mul_op
, 0), 1))
9840 && INTVAL (XEXP (XEXP (mul_op
, 0), 1)) == 16
9841 && (GET_CODE (XEXP (mul_op
, 1)) == SIGN_EXTEND
9842 || (GET_CODE (XEXP (mul_op
, 1)) == ASHIFTRT
9843 && CONST_INT_P (XEXP (XEXP (mul_op
, 1), 1))
9844 && (INTVAL (XEXP (XEXP (mul_op
, 1), 1))
9849 *cost
+= extra_cost
->mult
[0].extend_add
;
9850 *cost
+= (rtx_cost (XEXP (XEXP (mul_op
, 0), 0), mode
,
9851 SIGN_EXTEND
, 0, speed_p
)
9852 + rtx_cost (XEXP (XEXP (mul_op
, 1), 0), mode
,
9853 SIGN_EXTEND
, 0, speed_p
)
9854 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9859 *cost
+= extra_cost
->mult
[0].add
;
9860 *cost
+= (rtx_cost (XEXP (mul_op
, 0), mode
, MULT
, 0, speed_p
)
9861 + rtx_cost (XEXP (mul_op
, 1), mode
, MULT
, 1, speed_p
)
9862 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9865 if (CONST_INT_P (XEXP (x
, 1)))
9867 int insns
= arm_gen_constant (PLUS
, SImode
, NULL_RTX
,
9868 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9870 *cost
= COSTS_N_INSNS (insns
);
9872 *cost
+= insns
* extra_cost
->alu
.arith
;
9873 *cost
+= rtx_cost (XEXP (x
, 0), mode
, PLUS
, 0, speed_p
);
9877 *cost
+= extra_cost
->alu
.arith
;
9885 && GET_CODE (XEXP (x
, 0)) == MULT
9886 && ((GET_CODE (XEXP (XEXP (x
, 0), 0)) == ZERO_EXTEND
9887 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == ZERO_EXTEND
)
9888 || (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SIGN_EXTEND
9889 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == SIGN_EXTEND
)))
9892 *cost
+= extra_cost
->mult
[1].extend_add
;
9893 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
9894 ZERO_EXTEND
, 0, speed_p
)
9895 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 1), 0), mode
,
9896 ZERO_EXTEND
, 0, speed_p
)
9897 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9901 *cost
+= COSTS_N_INSNS (1);
9903 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9904 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
9907 *cost
+= (extra_cost
->alu
.arith
9908 + (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
9909 ? extra_cost
->alu
.arith
9910 : extra_cost
->alu
.arith_shift
));
9912 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
, ZERO_EXTEND
,
9914 + rtx_cost (XEXP (x
, 1), mode
, PLUS
, 1, speed_p
));
9919 *cost
+= 2 * extra_cost
->alu
.arith
;
9924 *cost
= LIBCALL_COST (2);
9927 if (mode
== SImode
&& arm_arch6
&& aarch_rev16_p (x
))
9930 *cost
+= extra_cost
->alu
.rev
;
9938 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
9939 rtx op0
= XEXP (x
, 0);
9940 rtx shift_op
, shift_reg
;
9944 || (code
== IOR
&& TARGET_THUMB2
)))
9945 op0
= XEXP (op0
, 0);
9948 shift_op
= shifter_op_p (op0
, &shift_reg
);
9949 if (shift_op
!= NULL
)
9954 *cost
+= extra_cost
->alu
.log_shift_reg
;
9955 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
9958 *cost
+= extra_cost
->alu
.log_shift
;
9960 *cost
+= (rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
)
9961 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9965 if (CONST_INT_P (XEXP (x
, 1)))
9967 int insns
= arm_gen_constant (code
, SImode
, NULL_RTX
,
9968 INTVAL (XEXP (x
, 1)), NULL_RTX
,
9971 *cost
= COSTS_N_INSNS (insns
);
9973 *cost
+= insns
* extra_cost
->alu
.logical
;
9974 *cost
+= rtx_cost (op0
, mode
, code
, 0, speed_p
);
9979 *cost
+= extra_cost
->alu
.logical
;
9980 *cost
+= (rtx_cost (op0
, mode
, code
, 0, speed_p
)
9981 + rtx_cost (XEXP (x
, 1), mode
, code
, 1, speed_p
));
9987 rtx op0
= XEXP (x
, 0);
9988 enum rtx_code subcode
= GET_CODE (op0
);
9990 *cost
+= COSTS_N_INSNS (1);
9994 || (code
== IOR
&& TARGET_THUMB2
)))
9995 op0
= XEXP (op0
, 0);
9997 if (GET_CODE (op0
) == ZERO_EXTEND
)
10000 *cost
+= 2 * extra_cost
->alu
.logical
;
10002 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, ZERO_EXTEND
,
10004 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10007 else if (GET_CODE (op0
) == SIGN_EXTEND
)
10010 *cost
+= extra_cost
->alu
.logical
+ extra_cost
->alu
.log_shift
;
10012 *cost
+= (rtx_cost (XEXP (op0
, 0), VOIDmode
, SIGN_EXTEND
,
10014 + rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed_p
));
10019 *cost
+= 2 * extra_cost
->alu
.logical
;
10025 *cost
= LIBCALL_COST (2);
10029 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10030 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10032 rtx op0
= XEXP (x
, 0);
10034 if (GET_CODE (op0
) == NEG
&& !flag_rounding_math
)
10035 op0
= XEXP (op0
, 0);
10038 *cost
+= extra_cost
->fp
[mode
!= SFmode
].mult
;
10040 *cost
+= (rtx_cost (op0
, mode
, MULT
, 0, speed_p
)
10041 + rtx_cost (XEXP (x
, 1), mode
, MULT
, 1, speed_p
));
10044 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10046 *cost
= LIBCALL_COST (2);
10050 if (mode
== SImode
)
10052 if (TARGET_DSP_MULTIPLY
10053 && ((GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10054 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10055 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10056 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10057 && INTVAL (XEXP (XEXP (x
, 1), 1)) == 16)))
10058 || (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10059 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10060 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 16
10061 && (GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
10062 || (GET_CODE (XEXP (x
, 1)) == ASHIFTRT
10063 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
10064 && (INTVAL (XEXP (XEXP (x
, 1), 1))
10067 /* SMUL[TB][TB]. */
10069 *cost
+= extra_cost
->mult
[0].extend
;
10070 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
10071 SIGN_EXTEND
, 0, speed_p
);
10072 *cost
+= rtx_cost (XEXP (XEXP (x
, 1), 0), mode
,
10073 SIGN_EXTEND
, 1, speed_p
);
10077 *cost
+= extra_cost
->mult
[0].simple
;
10081 if (mode
== DImode
)
10084 && ((GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
10085 && GET_CODE (XEXP (x
, 1)) == ZERO_EXTEND
)
10086 || (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
10087 && GET_CODE (XEXP (x
, 1)) == SIGN_EXTEND
)))
10090 *cost
+= extra_cost
->mult
[1].extend
;
10091 *cost
+= (rtx_cost (XEXP (XEXP (x
, 0), 0), VOIDmode
,
10092 ZERO_EXTEND
, 0, speed_p
)
10093 + rtx_cost (XEXP (XEXP (x
, 1), 0), VOIDmode
,
10094 ZERO_EXTEND
, 0, speed_p
));
10098 *cost
= LIBCALL_COST (2);
10103 *cost
= LIBCALL_COST (2);
10107 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10108 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10110 if (GET_CODE (XEXP (x
, 0)) == MULT
)
10113 *cost
= rtx_cost (XEXP (x
, 0), mode
, NEG
, 0, speed_p
);
10118 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10122 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10124 *cost
= LIBCALL_COST (1);
10128 if (mode
== SImode
)
10130 if (GET_CODE (XEXP (x
, 0)) == ABS
)
10132 *cost
+= COSTS_N_INSNS (1);
10133 /* Assume the non-flag-changing variant. */
10135 *cost
+= (extra_cost
->alu
.log_shift
10136 + extra_cost
->alu
.arith_shift
);
10137 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, ABS
, 0, speed_p
);
10141 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
10142 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
10144 *cost
+= COSTS_N_INSNS (1);
10145 /* No extra cost for MOV imm and MVN imm. */
10146 /* If the comparison op is using the flags, there's no further
10147 cost, otherwise we need to add the cost of the comparison. */
10148 if (!(REG_P (XEXP (XEXP (x
, 0), 0))
10149 && REGNO (XEXP (XEXP (x
, 0), 0)) == CC_REGNUM
10150 && XEXP (XEXP (x
, 0), 1) == const0_rtx
))
10152 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
10153 *cost
+= (COSTS_N_INSNS (1)
10154 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, COMPARE
,
10156 + rtx_cost (XEXP (XEXP (x
, 0), 1), mode
, COMPARE
,
10159 *cost
+= extra_cost
->alu
.arith
;
10165 *cost
+= extra_cost
->alu
.arith
;
10169 if (GET_MODE_CLASS (mode
) == MODE_INT
10170 && GET_MODE_SIZE (mode
) < 4)
10172 /* Slightly disparage, as we might need an extend operation. */
10175 *cost
+= extra_cost
->alu
.arith
;
10179 if (mode
== DImode
)
10181 *cost
+= COSTS_N_INSNS (1);
10183 *cost
+= 2 * extra_cost
->alu
.arith
;
10188 *cost
= LIBCALL_COST (1);
10192 if (mode
== SImode
)
10195 rtx shift_reg
= NULL
;
10197 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10201 if (shift_reg
!= NULL
)
10204 *cost
+= extra_cost
->alu
.log_shift_reg
;
10205 *cost
+= rtx_cost (shift_reg
, mode
, ASHIFT
, 1, speed_p
);
10208 *cost
+= extra_cost
->alu
.log_shift
;
10209 *cost
+= rtx_cost (shift_op
, mode
, ASHIFT
, 0, speed_p
);
10214 *cost
+= extra_cost
->alu
.logical
;
10217 if (mode
== DImode
)
10219 *cost
+= COSTS_N_INSNS (1);
10225 *cost
+= LIBCALL_COST (1);
10230 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
10232 *cost
+= COSTS_N_INSNS (3);
10235 int op1cost
= rtx_cost (XEXP (x
, 1), mode
, SET
, 1, speed_p
);
10236 int op2cost
= rtx_cost (XEXP (x
, 2), mode
, SET
, 1, speed_p
);
10238 *cost
= rtx_cost (XEXP (x
, 0), mode
, IF_THEN_ELSE
, 0, speed_p
);
10239 /* Assume that if one arm of the if_then_else is a register,
10240 that it will be tied with the result and eliminate the
10241 conditional insn. */
10242 if (REG_P (XEXP (x
, 1)))
10244 else if (REG_P (XEXP (x
, 2)))
10250 if (extra_cost
->alu
.non_exec_costs_exec
)
10251 *cost
+= op1cost
+ op2cost
+ extra_cost
->alu
.non_exec
;
10253 *cost
+= MAX (op1cost
, op2cost
) + extra_cost
->alu
.non_exec
;
10256 *cost
+= op1cost
+ op2cost
;
10262 if (cc_register (XEXP (x
, 0), VOIDmode
) && XEXP (x
, 1) == const0_rtx
)
10266 machine_mode op0mode
;
10267 /* We'll mostly assume that the cost of a compare is the cost of the
10268 LHS. However, there are some notable exceptions. */
10270 /* Floating point compares are never done as side-effects. */
10271 op0mode
= GET_MODE (XEXP (x
, 0));
10272 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (op0mode
) == MODE_FLOAT
10273 && (op0mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10276 *cost
+= extra_cost
->fp
[op0mode
!= SFmode
].compare
;
10278 if (XEXP (x
, 1) == CONST0_RTX (op0mode
))
10280 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, code
, 0, speed_p
);
10286 else if (GET_MODE_CLASS (op0mode
) == MODE_FLOAT
)
10288 *cost
= LIBCALL_COST (2);
10292 /* DImode compares normally take two insns. */
10293 if (op0mode
== DImode
)
10295 *cost
+= COSTS_N_INSNS (1);
10297 *cost
+= 2 * extra_cost
->alu
.arith
;
10301 if (op0mode
== SImode
)
10306 if (XEXP (x
, 1) == const0_rtx
10307 && !(REG_P (XEXP (x
, 0))
10308 || (GET_CODE (XEXP (x
, 0)) == SUBREG
10309 && REG_P (SUBREG_REG (XEXP (x
, 0))))))
10311 *cost
= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10313 /* Multiply operations that set the flags are often
10314 significantly more expensive. */
10316 && GET_CODE (XEXP (x
, 0)) == MULT
10317 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), mode
))
10318 *cost
+= extra_cost
->mult
[0].flag_setting
;
10321 && GET_CODE (XEXP (x
, 0)) == PLUS
10322 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10323 && !power_of_two_operand (XEXP (XEXP (XEXP (x
, 0),
10325 *cost
+= extra_cost
->mult
[0].flag_setting
;
10330 shift_op
= shifter_op_p (XEXP (x
, 0), &shift_reg
);
10331 if (shift_op
!= NULL
)
10333 if (shift_reg
!= NULL
)
10335 *cost
+= rtx_cost (shift_reg
, op0mode
, ASHIFT
,
10338 *cost
+= extra_cost
->alu
.arith_shift_reg
;
10341 *cost
+= extra_cost
->alu
.arith_shift
;
10342 *cost
+= rtx_cost (shift_op
, op0mode
, ASHIFT
, 0, speed_p
);
10343 *cost
+= rtx_cost (XEXP (x
, 1), op0mode
, COMPARE
, 1, speed_p
);
10348 *cost
+= extra_cost
->alu
.arith
;
10349 if (CONST_INT_P (XEXP (x
, 1))
10350 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10352 *cost
+= rtx_cost (XEXP (x
, 0), op0mode
, COMPARE
, 0, speed_p
);
10360 *cost
= LIBCALL_COST (2);
10383 if (outer_code
== SET
)
10385 /* Is it a store-flag operation? */
10386 if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10387 && XEXP (x
, 1) == const0_rtx
)
10389 /* Thumb also needs an IT insn. */
10390 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 2 : 1);
10393 if (XEXP (x
, 1) == const0_rtx
)
10398 /* LSR Rd, Rn, #31. */
10400 *cost
+= extra_cost
->alu
.shift
;
10410 *cost
+= COSTS_N_INSNS (1);
10414 /* RSBS T1, Rn, Rn, LSR #31
10416 *cost
+= COSTS_N_INSNS (1);
10418 *cost
+= extra_cost
->alu
.arith_shift
;
10422 /* RSB Rd, Rn, Rn, ASR #1
10423 LSR Rd, Rd, #31. */
10424 *cost
+= COSTS_N_INSNS (1);
10426 *cost
+= (extra_cost
->alu
.arith_shift
10427 + extra_cost
->alu
.shift
);
10433 *cost
+= COSTS_N_INSNS (1);
10435 *cost
+= extra_cost
->alu
.shift
;
10439 /* Remaining cases are either meaningless or would take
10440 three insns anyway. */
10441 *cost
= COSTS_N_INSNS (3);
10444 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10449 *cost
+= COSTS_N_INSNS (TARGET_THUMB
? 3 : 2);
10450 if (CONST_INT_P (XEXP (x
, 1))
10451 && const_ok_for_op (INTVAL (XEXP (x
, 1)), COMPARE
))
10453 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10460 /* Not directly inside a set. If it involves the condition code
10461 register it must be the condition for a branch, cond_exec or
10462 I_T_E operation. Since the comparison is performed elsewhere
10463 this is just the control part which has no additional
10465 else if (REG_P (XEXP (x
, 0)) && REGNO (XEXP (x
, 0)) == CC_REGNUM
10466 && XEXP (x
, 1) == const0_rtx
)
10474 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10475 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10478 *cost
+= extra_cost
->fp
[mode
!= SFmode
].neg
;
10482 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10484 *cost
= LIBCALL_COST (1);
10488 if (mode
== SImode
)
10491 *cost
+= extra_cost
->alu
.log_shift
+ extra_cost
->alu
.arith_shift
;
10495 *cost
= LIBCALL_COST (1);
10499 if ((arm_arch4
|| GET_MODE (XEXP (x
, 0)) == SImode
)
10500 && MEM_P (XEXP (x
, 0)))
10502 if (mode
== DImode
)
10503 *cost
+= COSTS_N_INSNS (1);
10508 if (GET_MODE (XEXP (x
, 0)) == SImode
)
10509 *cost
+= extra_cost
->ldst
.load
;
10511 *cost
+= extra_cost
->ldst
.load_sign_extend
;
10513 if (mode
== DImode
)
10514 *cost
+= extra_cost
->alu
.shift
;
10519 /* Widening from less than 32-bits requires an extend operation. */
10520 if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10522 /* We have SXTB/SXTH. */
10523 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10525 *cost
+= extra_cost
->alu
.extend
;
10527 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10529 /* Needs two shifts. */
10530 *cost
+= COSTS_N_INSNS (1);
10531 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10533 *cost
+= 2 * extra_cost
->alu
.shift
;
10536 /* Widening beyond 32-bits requires one more insn. */
10537 if (mode
== DImode
)
10539 *cost
+= COSTS_N_INSNS (1);
10541 *cost
+= extra_cost
->alu
.shift
;
10548 || GET_MODE (XEXP (x
, 0)) == SImode
10549 || GET_MODE (XEXP (x
, 0)) == QImode
)
10550 && MEM_P (XEXP (x
, 0)))
10552 *cost
= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10554 if (mode
== DImode
)
10555 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10560 /* Widening from less than 32-bits requires an extend operation. */
10561 if (GET_MODE (XEXP (x
, 0)) == QImode
)
10563 /* UXTB can be a shorter instruction in Thumb2, but it might
10564 be slower than the AND Rd, Rn, #255 alternative. When
10565 optimizing for speed it should never be slower to use
10566 AND, and we don't really model 16-bit vs 32-bit insns
10569 *cost
+= extra_cost
->alu
.logical
;
10571 else if (GET_MODE (XEXP (x
, 0)) != SImode
&& arm_arch6
)
10573 /* We have UXTB/UXTH. */
10574 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10576 *cost
+= extra_cost
->alu
.extend
;
10578 else if (GET_MODE (XEXP (x
, 0)) != SImode
)
10580 /* Needs two shifts. It's marginally preferable to use
10581 shifts rather than two BIC instructions as the second
10582 shift may merge with a subsequent insn as a shifter
10584 *cost
= COSTS_N_INSNS (2);
10585 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10587 *cost
+= 2 * extra_cost
->alu
.shift
;
10590 /* Widening beyond 32-bits requires one more insn. */
10591 if (mode
== DImode
)
10593 *cost
+= COSTS_N_INSNS (1); /* No speed penalty. */
10600 /* CONST_INT has no mode, so we cannot tell for sure how many
10601 insns are really going to be needed. The best we can do is
10602 look at the value passed. If it fits in SImode, then assume
10603 that's the mode it will be used for. Otherwise assume it
10604 will be used in DImode. */
10605 if (INTVAL (x
) == trunc_int_for_mode (INTVAL (x
), SImode
))
10610 /* Avoid blowing up in arm_gen_constant (). */
10611 if (!(outer_code
== PLUS
10612 || outer_code
== AND
10613 || outer_code
== IOR
10614 || outer_code
== XOR
10615 || outer_code
== MINUS
))
10619 if (mode
== SImode
)
10621 *cost
+= COSTS_N_INSNS (arm_gen_constant (outer_code
, SImode
, NULL
,
10622 INTVAL (x
), NULL
, NULL
,
10628 *cost
+= COSTS_N_INSNS (arm_gen_constant
10629 (outer_code
, SImode
, NULL
,
10630 trunc_int_for_mode (INTVAL (x
), SImode
),
10632 + arm_gen_constant (outer_code
, SImode
, NULL
,
10633 INTVAL (x
) >> 32, NULL
,
10645 if (arm_arch_thumb2
&& !flag_pic
)
10646 *cost
+= COSTS_N_INSNS (1);
10648 *cost
+= extra_cost
->ldst
.load
;
10651 *cost
+= COSTS_N_INSNS (1);
10655 *cost
+= COSTS_N_INSNS (1);
10657 *cost
+= extra_cost
->alu
.arith
;
10663 *cost
= COSTS_N_INSNS (4);
10668 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
10669 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
10671 if (vfp3_const_double_rtx (x
))
10674 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
10680 if (mode
== DFmode
)
10681 *cost
+= extra_cost
->ldst
.loadd
;
10683 *cost
+= extra_cost
->ldst
.loadf
;
10686 *cost
+= COSTS_N_INSNS (1 + (mode
== DFmode
));
10690 *cost
= COSTS_N_INSNS (4);
10696 && TARGET_HARD_FLOAT
10697 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
10698 && neon_immediate_valid_for_move (x
, mode
, NULL
, NULL
))
10699 *cost
= COSTS_N_INSNS (1);
10701 *cost
= COSTS_N_INSNS (4);
10706 /* When optimizing for size, we prefer constant pool entries to
10707 MOVW/MOVT pairs, so bump the cost of these slightly. */
10714 *cost
+= extra_cost
->alu
.clz
;
10718 if (XEXP (x
, 1) == const0_rtx
)
10721 *cost
+= extra_cost
->alu
.log_shift
;
10722 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10725 /* Fall through. */
10729 *cost
+= COSTS_N_INSNS (1);
10733 if (GET_CODE (XEXP (x
, 0)) == ASHIFTRT
10734 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
10735 && INTVAL (XEXP (XEXP (x
, 0), 1)) == 32
10736 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10737 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
10738 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
)
10739 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
10740 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10744 *cost
+= extra_cost
->mult
[1].extend
;
10745 *cost
+= (rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), VOIDmode
,
10746 ZERO_EXTEND
, 0, speed_p
)
10747 + rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 1), VOIDmode
,
10748 ZERO_EXTEND
, 0, speed_p
));
10751 *cost
= LIBCALL_COST (1);
10754 case UNSPEC_VOLATILE
:
10756 return arm_unspec_cost (x
, outer_code
, speed_p
, cost
);
10759 /* Reading the PC is like reading any other register. Writing it
10760 is more expensive, but we take that into account elsewhere. */
10765 /* TODO: Simple zero_extract of bottom bits using AND. */
10766 /* Fall through. */
10770 && CONST_INT_P (XEXP (x
, 1))
10771 && CONST_INT_P (XEXP (x
, 2)))
10774 *cost
+= extra_cost
->alu
.bfx
;
10775 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10778 /* Without UBFX/SBFX, need to resort to shift operations. */
10779 *cost
+= COSTS_N_INSNS (1);
10781 *cost
+= 2 * extra_cost
->alu
.shift
;
10782 *cost
+= rtx_cost (XEXP (x
, 0), mode
, ASHIFT
, 0, speed_p
);
10786 if (TARGET_HARD_FLOAT
)
10789 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
10791 && GET_MODE (XEXP (x
, 0)) == HFmode
)
10793 /* Pre v8, widening HF->DF is a two-step process, first
10794 widening to SFmode. */
10795 *cost
+= COSTS_N_INSNS (1);
10797 *cost
+= extra_cost
->fp
[0].widen
;
10799 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10803 *cost
= LIBCALL_COST (1);
10806 case FLOAT_TRUNCATE
:
10807 if (TARGET_HARD_FLOAT
)
10810 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
10811 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, code
, 0, speed_p
);
10813 /* Vector modes? */
10815 *cost
= LIBCALL_COST (1);
10819 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
&& TARGET_FMA
)
10821 rtx op0
= XEXP (x
, 0);
10822 rtx op1
= XEXP (x
, 1);
10823 rtx op2
= XEXP (x
, 2);
10826 /* vfms or vfnma. */
10827 if (GET_CODE (op0
) == NEG
)
10828 op0
= XEXP (op0
, 0);
10830 /* vfnms or vfnma. */
10831 if (GET_CODE (op2
) == NEG
)
10832 op2
= XEXP (op2
, 0);
10834 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed_p
);
10835 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed_p
);
10836 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed_p
);
10839 *cost
+= extra_cost
->fp
[mode
==DFmode
].fma
;
10844 *cost
= LIBCALL_COST (3);
10849 if (TARGET_HARD_FLOAT
)
10851 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10852 a vcvt fixed-point conversion. */
10853 if (code
== FIX
&& mode
== SImode
10854 && GET_CODE (XEXP (x
, 0)) == FIX
10855 && GET_MODE (XEXP (x
, 0)) == SFmode
10856 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
10857 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x
, 0), 0), 1))
10861 *cost
+= extra_cost
->fp
[0].toint
;
10863 *cost
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
10868 if (GET_MODE_CLASS (mode
) == MODE_INT
)
10870 mode
= GET_MODE (XEXP (x
, 0));
10872 *cost
+= extra_cost
->fp
[mode
== DFmode
].toint
;
10873 /* Strip of the 'cost' of rounding towards zero. */
10874 if (GET_CODE (XEXP (x
, 0)) == FIX
)
10875 *cost
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, code
,
10878 *cost
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed_p
);
10879 /* ??? Increase the cost to deal with transferring from
10880 FP -> CORE registers? */
10883 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
10887 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
10890 /* Vector costs? */
10892 *cost
= LIBCALL_COST (1);
10896 case UNSIGNED_FLOAT
:
10897 if (TARGET_HARD_FLOAT
)
10899 /* ??? Increase the cost to deal with transferring from CORE
10900 -> FP registers? */
10902 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
10905 *cost
= LIBCALL_COST (1);
10913 /* Just a guess. Guess number of instructions in the asm
10914 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10915 though (see PR60663). */
10916 int asm_length
= MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x
)));
10917 int num_operands
= ASM_OPERANDS_INPUT_LENGTH (x
);
10919 *cost
= COSTS_N_INSNS (asm_length
+ num_operands
);
10923 if (mode
!= VOIDmode
)
10924 *cost
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
10926 *cost
= COSTS_N_INSNS (4); /* Who knows? */
10931 #undef HANDLE_NARROW_SHIFT_ARITH
10933 /* RTX costs entry point. */
10936 arm_rtx_costs (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
, int outer_code
,
10937 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
10940 int code
= GET_CODE (x
);
10941 gcc_assert (current_tune
->insn_extra_cost
);
10943 result
= arm_rtx_costs_internal (x
, (enum rtx_code
) code
,
10944 (enum rtx_code
) outer_code
,
10945 current_tune
->insn_extra_cost
,
10948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
10950 print_rtl_single (dump_file
, x
);
10951 fprintf (dump_file
, "\n%s cost: %d (%s)\n", speed
? "Hot" : "Cold",
10952 *total
, result
? "final" : "partial");
10957 /* All address computations that can be done are free, but rtx cost returns
10958 the same for practically all of them. So we weight the different types
10959 of address here in the order (most pref first):
10960 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10962 arm_arm_address_cost (rtx x
)
10964 enum rtx_code c
= GET_CODE (x
);
10966 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
10968 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
10973 if (CONST_INT_P (XEXP (x
, 1)))
10976 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
10986 arm_thumb_address_cost (rtx x
)
10988 enum rtx_code c
= GET_CODE (x
);
10993 && REG_P (XEXP (x
, 0))
10994 && CONST_INT_P (XEXP (x
, 1)))
11001 arm_address_cost (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
,
11002 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
11004 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
11007 /* Adjust cost hook for XScale. */
11009 xscale_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11012 /* Some true dependencies can have a higher cost depending
11013 on precisely how certain input operands are used. */
11015 && recog_memoized (insn
) >= 0
11016 && recog_memoized (dep
) >= 0)
11018 int shift_opnum
= get_attr_shift (insn
);
11019 enum attr_type attr_type
= get_attr_type (dep
);
11021 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11022 operand for INSN. If we have a shifted input operand and the
11023 instruction we depend on is another ALU instruction, then we may
11024 have to account for an additional stall. */
11025 if (shift_opnum
!= 0
11026 && (attr_type
== TYPE_ALU_SHIFT_IMM
11027 || attr_type
== TYPE_ALUS_SHIFT_IMM
11028 || attr_type
== TYPE_LOGIC_SHIFT_IMM
11029 || attr_type
== TYPE_LOGICS_SHIFT_IMM
11030 || attr_type
== TYPE_ALU_SHIFT_REG
11031 || attr_type
== TYPE_ALUS_SHIFT_REG
11032 || attr_type
== TYPE_LOGIC_SHIFT_REG
11033 || attr_type
== TYPE_LOGICS_SHIFT_REG
11034 || attr_type
== TYPE_MOV_SHIFT
11035 || attr_type
== TYPE_MVN_SHIFT
11036 || attr_type
== TYPE_MOV_SHIFT_REG
11037 || attr_type
== TYPE_MVN_SHIFT_REG
))
11039 rtx shifted_operand
;
11042 /* Get the shifted operand. */
11043 extract_insn (insn
);
11044 shifted_operand
= recog_data
.operand
[shift_opnum
];
11046 /* Iterate over all the operands in DEP. If we write an operand
11047 that overlaps with SHIFTED_OPERAND, then we have increase the
11048 cost of this dependency. */
11049 extract_insn (dep
);
11050 preprocess_constraints (dep
);
11051 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11053 /* We can ignore strict inputs. */
11054 if (recog_data
.operand_type
[opno
] == OP_IN
)
11057 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
11069 /* Adjust cost hook for Cortex A9. */
11071 cortex_a9_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11081 case REG_DEP_OUTPUT
:
11082 if (recog_memoized (insn
) >= 0
11083 && recog_memoized (dep
) >= 0)
11085 if (GET_CODE (PATTERN (insn
)) == SET
)
11088 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
11090 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
11092 enum attr_type attr_type_insn
= get_attr_type (insn
);
11093 enum attr_type attr_type_dep
= get_attr_type (dep
);
11095 /* By default all dependencies of the form
11098 have an extra latency of 1 cycle because
11099 of the input and output dependency in this
11100 case. However this gets modeled as an true
11101 dependency and hence all these checks. */
11102 if (REG_P (SET_DEST (PATTERN (insn
)))
11103 && reg_set_p (SET_DEST (PATTERN (insn
)), dep
))
11105 /* FMACS is a special case where the dependent
11106 instruction can be issued 3 cycles before
11107 the normal latency in case of an output
11109 if ((attr_type_insn
== TYPE_FMACS
11110 || attr_type_insn
== TYPE_FMACD
)
11111 && (attr_type_dep
== TYPE_FMACS
11112 || attr_type_dep
== TYPE_FMACD
))
11114 if (dep_type
== REG_DEP_OUTPUT
)
11115 *cost
= insn_default_latency (dep
) - 3;
11117 *cost
= insn_default_latency (dep
);
11122 if (dep_type
== REG_DEP_OUTPUT
)
11123 *cost
= insn_default_latency (dep
) + 1;
11125 *cost
= insn_default_latency (dep
);
11135 gcc_unreachable ();
11141 /* Adjust cost hook for FA726TE. */
11143 fa726te_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
,
11146 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11147 have penalty of 3. */
11148 if (dep_type
== REG_DEP_TRUE
11149 && recog_memoized (insn
) >= 0
11150 && recog_memoized (dep
) >= 0
11151 && get_attr_conds (dep
) == CONDS_SET
)
11153 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11154 if (get_attr_conds (insn
) == CONDS_USE
11155 && get_attr_type (insn
) != TYPE_BRANCH
)
11161 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
11162 || get_attr_conds (insn
) == CONDS_USE
)
11172 /* Implement TARGET_REGISTER_MOVE_COST.
11174 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11175 it is typically more expensive than a single memory access. We set
11176 the cost to less than two memory accesses so that floating
11177 point to integer conversion does not go through memory. */
11180 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
11181 reg_class_t from
, reg_class_t to
)
11185 if ((IS_VFP_CLASS (from
) && !IS_VFP_CLASS (to
))
11186 || (!IS_VFP_CLASS (from
) && IS_VFP_CLASS (to
)))
11188 else if ((from
== IWMMXT_REGS
&& to
!= IWMMXT_REGS
)
11189 || (from
!= IWMMXT_REGS
&& to
== IWMMXT_REGS
))
11191 else if (from
== IWMMXT_GR_REGS
|| to
== IWMMXT_GR_REGS
)
11198 if (from
== HI_REGS
|| to
== HI_REGS
)
11205 /* Implement TARGET_MEMORY_MOVE_COST. */
11208 arm_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
11209 bool in ATTRIBUTE_UNUSED
)
11215 if (GET_MODE_SIZE (mode
) < 4)
11218 return ((2 * GET_MODE_SIZE (mode
)) * (rclass
== LO_REGS
? 1 : 2));
11222 /* Vectorizer cost model implementation. */
11224 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11226 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
11228 int misalign ATTRIBUTE_UNUSED
)
11232 switch (type_of_cost
)
11235 return current_tune
->vec_costs
->scalar_stmt_cost
;
11238 return current_tune
->vec_costs
->scalar_load_cost
;
11241 return current_tune
->vec_costs
->scalar_store_cost
;
11244 return current_tune
->vec_costs
->vec_stmt_cost
;
11247 return current_tune
->vec_costs
->vec_align_load_cost
;
11250 return current_tune
->vec_costs
->vec_store_cost
;
11252 case vec_to_scalar
:
11253 return current_tune
->vec_costs
->vec_to_scalar_cost
;
11255 case scalar_to_vec
:
11256 return current_tune
->vec_costs
->scalar_to_vec_cost
;
11258 case unaligned_load
:
11259 case vector_gather_load
:
11260 return current_tune
->vec_costs
->vec_unalign_load_cost
;
11262 case unaligned_store
:
11263 case vector_scatter_store
:
11264 return current_tune
->vec_costs
->vec_unalign_store_cost
;
11266 case cond_branch_taken
:
11267 return current_tune
->vec_costs
->cond_taken_branch_cost
;
11269 case cond_branch_not_taken
:
11270 return current_tune
->vec_costs
->cond_not_taken_branch_cost
;
11273 case vec_promote_demote
:
11274 return current_tune
->vec_costs
->vec_stmt_cost
;
11276 case vec_construct
:
11277 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
11278 return elements
/ 2 + 1;
11281 gcc_unreachable ();
11285 /* Implement targetm.vectorize.add_stmt_cost. */
11288 arm_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
11289 struct _stmt_vec_info
*stmt_info
, int misalign
,
11290 enum vect_cost_model_location where
)
11292 unsigned *cost
= (unsigned *) data
;
11293 unsigned retval
= 0;
11295 if (flag_vect_cost_model
)
11297 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
11298 int stmt_cost
= arm_builtin_vectorization_cost (kind
, vectype
, misalign
);
11300 /* Statements in an inner loop relative to the loop being
11301 vectorized are weighted more heavily. The value here is
11302 arbitrary and could potentially be improved with analysis. */
11303 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
11304 count
*= 50; /* FIXME. */
11306 retval
= (unsigned) (count
* stmt_cost
);
11307 cost
[where
] += retval
;
11313 /* Return true if and only if this insn can dual-issue only as older. */
11315 cortexa7_older_only (rtx_insn
*insn
)
11317 if (recog_memoized (insn
) < 0)
11320 switch (get_attr_type (insn
))
11322 case TYPE_ALU_DSP_REG
:
11323 case TYPE_ALU_SREG
:
11324 case TYPE_ALUS_SREG
:
11325 case TYPE_LOGIC_REG
:
11326 case TYPE_LOGICS_REG
:
11328 case TYPE_ADCS_REG
:
11333 case TYPE_SHIFT_IMM
:
11334 case TYPE_SHIFT_REG
:
11335 case TYPE_LOAD_BYTE
:
11338 case TYPE_FFARITHS
:
11340 case TYPE_FFARITHD
:
11358 case TYPE_F_STORES
:
11365 /* Return true if and only if this insn can dual-issue as younger. */
11367 cortexa7_younger (FILE *file
, int verbose
, rtx_insn
*insn
)
11369 if (recog_memoized (insn
) < 0)
11372 fprintf (file
, ";; not cortexa7_younger %d\n", INSN_UID (insn
));
11376 switch (get_attr_type (insn
))
11379 case TYPE_ALUS_IMM
:
11380 case TYPE_LOGIC_IMM
:
11381 case TYPE_LOGICS_IMM
:
11386 case TYPE_MOV_SHIFT
:
11387 case TYPE_MOV_SHIFT_REG
:
11397 /* Look for an instruction that can dual issue only as an older
11398 instruction, and move it in front of any instructions that can
11399 dual-issue as younger, while preserving the relative order of all
11400 other instructions in the ready list. This is a hueuristic to help
11401 dual-issue in later cycles, by postponing issue of more flexible
11402 instructions. This heuristic may affect dual issue opportunities
11403 in the current cycle. */
11405 cortexa7_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
,
11406 int *n_readyp
, int clock
)
11409 int first_older_only
= -1, first_younger
= -1;
11413 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11417 /* Traverse the ready list from the head (the instruction to issue
11418 first), and looking for the first instruction that can issue as
11419 younger and the first instruction that can dual-issue only as
11421 for (i
= *n_readyp
- 1; i
>= 0; i
--)
11423 rtx_insn
*insn
= ready
[i
];
11424 if (cortexa7_older_only (insn
))
11426 first_older_only
= i
;
11428 fprintf (file
, ";; reorder older found %d\n", INSN_UID (insn
));
11431 else if (cortexa7_younger (file
, verbose
, insn
) && first_younger
== -1)
11435 /* Nothing to reorder because either no younger insn found or insn
11436 that can dual-issue only as older appears before any insn that
11437 can dual-issue as younger. */
11438 if (first_younger
== -1)
11441 fprintf (file
, ";; sched_reorder nothing to reorder as no younger\n");
11445 /* Nothing to reorder because no older-only insn in the ready list. */
11446 if (first_older_only
== -1)
11449 fprintf (file
, ";; sched_reorder nothing to reorder as no older_only\n");
11453 /* Move first_older_only insn before first_younger. */
11455 fprintf (file
, ";; cortexa7_sched_reorder insn %d before %d\n",
11456 INSN_UID(ready
[first_older_only
]),
11457 INSN_UID(ready
[first_younger
]));
11458 rtx_insn
*first_older_only_insn
= ready
[first_older_only
];
11459 for (i
= first_older_only
; i
< first_younger
; i
++)
11461 ready
[i
] = ready
[i
+1];
11464 ready
[i
] = first_older_only_insn
;
11468 /* Implement TARGET_SCHED_REORDER. */
11470 arm_sched_reorder (FILE *file
, int verbose
, rtx_insn
**ready
, int *n_readyp
,
11475 case TARGET_CPU_cortexa7
:
11476 cortexa7_sched_reorder (file
, verbose
, ready
, n_readyp
, clock
);
11479 /* Do nothing for other cores. */
11483 return arm_issue_rate ();
11486 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11487 It corrects the value of COST based on the relationship between
11488 INSN and DEP through the dependence LINK. It returns the new
11489 value. There is a per-core adjust_cost hook to adjust scheduler costs
11490 and the per-core hook can choose to completely override the generic
11491 adjust_cost function. Only put bits of code into arm_adjust_cost that
11492 are common across all cores. */
11494 arm_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep
, int cost
,
11499 /* When generating Thumb-1 code, we want to place flag-setting operations
11500 close to a conditional branch which depends on them, so that we can
11501 omit the comparison. */
11504 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
11505 && recog_memoized (dep
) >= 0
11506 && get_attr_conds (dep
) == CONDS_SET
)
11509 if (current_tune
->sched_adjust_cost
!= NULL
)
11511 if (!current_tune
->sched_adjust_cost (insn
, dep_type
, dep
, &cost
))
11515 /* XXX Is this strictly true? */
11516 if (dep_type
== REG_DEP_ANTI
11517 || dep_type
== REG_DEP_OUTPUT
)
11520 /* Call insns don't incur a stall, even if they follow a load. */
11525 if ((i_pat
= single_set (insn
)) != NULL
11526 && MEM_P (SET_SRC (i_pat
))
11527 && (d_pat
= single_set (dep
)) != NULL
11528 && MEM_P (SET_DEST (d_pat
)))
11530 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
11531 /* This is a load after a store, there is no conflict if the load reads
11532 from a cached area. Assume that loads from the stack, and from the
11533 constant pool are cached, and that others will miss. This is a
11536 if ((GET_CODE (src_mem
) == SYMBOL_REF
11537 && CONSTANT_POOL_ADDRESS_P (src_mem
))
11538 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
11539 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
11540 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
11548 arm_max_conditional_execute (void)
11550 return max_insns_skipped
;
11554 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
11557 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
11559 return (optimize
> 0) ? 2 : 0;
11563 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
11565 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11568 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11569 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11570 sequences of non-executed instructions in IT blocks probably take the same
11571 amount of time as executed instructions (and the IT instruction itself takes
11572 space in icache). This function was experimentally determined to give good
11573 results on a popular embedded benchmark. */
11576 arm_cortex_m_branch_cost (bool speed_p
, bool predictable_p
)
11578 return (TARGET_32BIT
&& speed_p
) ? 1
11579 : arm_default_branch_cost (speed_p
, predictable_p
);
11583 arm_cortex_m7_branch_cost (bool speed_p
, bool predictable_p
)
11585 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
11588 static bool fp_consts_inited
= false;
11590 static REAL_VALUE_TYPE value_fp0
;
11593 init_fp_table (void)
11597 r
= REAL_VALUE_ATOF ("0", DFmode
);
11599 fp_consts_inited
= true;
11602 /* Return TRUE if rtx X is a valid immediate FP constant. */
11604 arm_const_double_rtx (rtx x
)
11606 const REAL_VALUE_TYPE
*r
;
11608 if (!fp_consts_inited
)
11611 r
= CONST_DOUBLE_REAL_VALUE (x
);
11612 if (REAL_VALUE_MINUS_ZERO (*r
))
11615 if (real_equal (r
, &value_fp0
))
11621 /* VFPv3 has a fairly wide range of representable immediates, formed from
11622 "quarter-precision" floating-point values. These can be evaluated using this
11623 formula (with ^ for exponentiation):
11627 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11628 16 <= n <= 31 and 0 <= r <= 7.
11630 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11632 - A (most-significant) is the sign bit.
11633 - BCD are the exponent (encoded as r XOR 3).
11634 - EFGH are the mantissa (encoded as n - 16).
11637 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11638 fconst[sd] instruction, or -1 if X isn't suitable. */
11640 vfp3_const_double_index (rtx x
)
11642 REAL_VALUE_TYPE r
, m
;
11643 int sign
, exponent
;
11644 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
11645 unsigned HOST_WIDE_INT mask
;
11646 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11649 if (!TARGET_VFP3
|| !CONST_DOUBLE_P (x
))
11652 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11654 /* We can't represent these things, so detect them first. */
11655 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
11658 /* Extract sign, exponent and mantissa. */
11659 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
11660 r
= real_value_abs (&r
);
11661 exponent
= REAL_EXP (&r
);
11662 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11663 highest (sign) bit, with a fixed binary point at bit point_pos.
11664 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11665 bits for the mantissa, this may fail (low bits would be lost). */
11666 real_ldexp (&m
, &r
, point_pos
- exponent
);
11667 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11668 mantissa
= w
.elt (0);
11669 mant_hi
= w
.elt (1);
11671 /* If there are bits set in the low part of the mantissa, we can't
11672 represent this value. */
11676 /* Now make it so that mantissa contains the most-significant bits, and move
11677 the point_pos to indicate that the least-significant bits have been
11679 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11680 mantissa
= mant_hi
;
11682 /* We can permit four significant bits of mantissa only, plus a high bit
11683 which is always 1. */
11684 mask
= (HOST_WIDE_INT_1U
<< (point_pos
- 5)) - 1;
11685 if ((mantissa
& mask
) != 0)
11688 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11689 mantissa
>>= point_pos
- 5;
11691 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11692 floating-point immediate zero with Neon using an integer-zero load, but
11693 that case is handled elsewhere.) */
11697 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
11699 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11700 normalized significands are in the range [1, 2). (Our mantissa is shifted
11701 left 4 places at this point relative to normalized IEEE754 values). GCC
11702 internally uses [0.5, 1) (see real.c), so the exponent returned from
11703 REAL_EXP must be altered. */
11704 exponent
= 5 - exponent
;
11706 if (exponent
< 0 || exponent
> 7)
11709 /* Sign, mantissa and exponent are now in the correct form to plug into the
11710 formula described in the comment above. */
11711 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
11714 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11716 vfp3_const_double_rtx (rtx x
)
11721 return vfp3_const_double_index (x
) != -1;
11724 /* Recognize immediates which can be used in various Neon instructions. Legal
11725 immediates are described by the following table (for VMVN variants, the
11726 bitwise inverse of the constant shown is recognized. In either case, VMOV
11727 is output and the correct instruction to use for a given constant is chosen
11728 by the assembler). The constant shown is replicated across all elements of
11729 the destination vector.
11731 insn elems variant constant (binary)
11732 ---- ----- ------- -----------------
11733 vmov i32 0 00000000 00000000 00000000 abcdefgh
11734 vmov i32 1 00000000 00000000 abcdefgh 00000000
11735 vmov i32 2 00000000 abcdefgh 00000000 00000000
11736 vmov i32 3 abcdefgh 00000000 00000000 00000000
11737 vmov i16 4 00000000 abcdefgh
11738 vmov i16 5 abcdefgh 00000000
11739 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11740 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11741 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11742 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11743 vmvn i16 10 00000000 abcdefgh
11744 vmvn i16 11 abcdefgh 00000000
11745 vmov i32 12 00000000 00000000 abcdefgh 11111111
11746 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11747 vmov i32 14 00000000 abcdefgh 11111111 11111111
11748 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11749 vmov i8 16 abcdefgh
11750 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11751 eeeeeeee ffffffff gggggggg hhhhhhhh
11752 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11753 vmov f32 19 00000000 00000000 00000000 00000000
11755 For case 18, B = !b. Representable values are exactly those accepted by
11756 vfp3_const_double_index, but are output as floating-point numbers rather
11759 For case 19, we will change it to vmov.i32 when assembling.
11761 Variants 0-5 (inclusive) may also be used as immediates for the second
11762 operand of VORR/VBIC instructions.
11764 The INVERSE argument causes the bitwise inverse of the given operand to be
11765 recognized instead (used for recognizing legal immediates for the VAND/VORN
11766 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11767 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11768 output, rather than the real insns vbic/vorr).
11770 INVERSE makes no difference to the recognition of float vectors.
11772 The return value is the variant of immediate as shown in the above table, or
11773 -1 if the given value doesn't match any of the listed patterns.
11776 neon_valid_immediate (rtx op
, machine_mode mode
, int inverse
,
11777 rtx
*modconst
, int *elementwidth
)
11779 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11781 for (i = 0; i < idx; i += (STRIDE)) \
11786 immtype = (CLASS); \
11787 elsize = (ELSIZE); \
11791 unsigned int i
, elsize
= 0, idx
= 0, n_elts
;
11792 unsigned int innersize
;
11793 unsigned char bytes
[16];
11794 int immtype
= -1, matches
;
11795 unsigned int invmask
= inverse
? 0xff : 0;
11796 bool vector
= GET_CODE (op
) == CONST_VECTOR
;
11799 n_elts
= CONST_VECTOR_NUNITS (op
);
11803 if (mode
== VOIDmode
)
11807 innersize
= GET_MODE_UNIT_SIZE (mode
);
11809 /* Vectors of float constants. */
11810 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11812 rtx el0
= CONST_VECTOR_ELT (op
, 0);
11814 if (!vfp3_const_double_rtx (el0
) && el0
!= CONST0_RTX (GET_MODE (el0
)))
11817 /* FP16 vectors cannot be represented. */
11818 if (GET_MODE_INNER (mode
) == HFmode
)
11821 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11822 are distinct in this context. */
11823 if (!const_vec_duplicate_p (op
))
11827 *modconst
= CONST_VECTOR_ELT (op
, 0);
11832 if (el0
== CONST0_RTX (GET_MODE (el0
)))
11838 /* The tricks done in the code below apply for little-endian vector layout.
11839 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11840 FIXME: Implement logic for big-endian vectors. */
11841 if (BYTES_BIG_ENDIAN
&& vector
&& !const_vec_duplicate_p (op
))
11844 /* Splat vector constant out into a byte vector. */
11845 for (i
= 0; i
< n_elts
; i
++)
11847 rtx el
= vector
? CONST_VECTOR_ELT (op
, i
) : op
;
11848 unsigned HOST_WIDE_INT elpart
;
11850 gcc_assert (CONST_INT_P (el
));
11851 elpart
= INTVAL (el
);
11853 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11855 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11856 elpart
>>= BITS_PER_UNIT
;
11860 /* Sanity check. */
11861 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11865 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11866 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11868 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11869 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11871 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11872 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11874 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11875 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
11877 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
11879 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
11881 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11882 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11884 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11885 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11887 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11888 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11890 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11891 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
11893 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
11895 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
11897 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11898 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
11900 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11901 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
11903 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11904 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
11906 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11907 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
11909 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
11911 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11912 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
11920 *elementwidth
= elsize
;
11924 unsigned HOST_WIDE_INT imm
= 0;
11926 /* Un-invert bytes of recognized vector, if necessary. */
11928 for (i
= 0; i
< idx
; i
++)
11929 bytes
[i
] ^= invmask
;
11933 /* FIXME: Broken on 32-bit H_W_I hosts. */
11934 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11936 for (i
= 0; i
< 8; i
++)
11937 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11938 << (i
* BITS_PER_UNIT
);
11940 *modconst
= GEN_INT (imm
);
11944 unsigned HOST_WIDE_INT imm
= 0;
11946 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11947 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11949 *modconst
= GEN_INT (imm
);
11957 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11958 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11959 float elements), and a modified constant (whatever should be output for a
11960 VMOV) in *MODCONST. */
11963 neon_immediate_valid_for_move (rtx op
, machine_mode mode
,
11964 rtx
*modconst
, int *elementwidth
)
11968 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
11974 *modconst
= tmpconst
;
11977 *elementwidth
= tmpwidth
;
11982 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11983 the immediate is valid, write a constant suitable for using as an operand
11984 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11985 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11988 neon_immediate_valid_for_logic (rtx op
, machine_mode mode
, int inverse
,
11989 rtx
*modconst
, int *elementwidth
)
11993 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
11995 if (retval
< 0 || retval
> 5)
11999 *modconst
= tmpconst
;
12002 *elementwidth
= tmpwidth
;
12007 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12008 the immediate is valid, write a constant suitable for using as an operand
12009 to VSHR/VSHL to *MODCONST and the corresponding element width to
12010 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12011 because they have different limitations. */
12014 neon_immediate_valid_for_shift (rtx op
, machine_mode mode
,
12015 rtx
*modconst
, int *elementwidth
,
12018 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
12019 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
12020 unsigned HOST_WIDE_INT last_elt
= 0;
12021 unsigned HOST_WIDE_INT maxshift
;
12023 /* Split vector constant out into a byte vector. */
12024 for (i
= 0; i
< n_elts
; i
++)
12026 rtx el
= CONST_VECTOR_ELT (op
, i
);
12027 unsigned HOST_WIDE_INT elpart
;
12029 if (CONST_INT_P (el
))
12030 elpart
= INTVAL (el
);
12031 else if (CONST_DOUBLE_P (el
))
12034 gcc_unreachable ();
12036 if (i
!= 0 && elpart
!= last_elt
)
12042 /* Shift less than element size. */
12043 maxshift
= innersize
* 8;
12047 /* Left shift immediate value can be from 0 to <size>-1. */
12048 if (last_elt
>= maxshift
)
12053 /* Right shift immediate value can be from 1 to <size>. */
12054 if (last_elt
== 0 || last_elt
> maxshift
)
12059 *elementwidth
= innersize
* 8;
12062 *modconst
= CONST_VECTOR_ELT (op
, 0);
12067 /* Return a string suitable for output of Neon immediate logic operation
12071 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, machine_mode mode
,
12072 int inverse
, int quad
)
12074 int width
, is_valid
;
12075 static char templ
[40];
12077 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
12079 gcc_assert (is_valid
!= 0);
12082 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
12084 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
12089 /* Return a string suitable for output of Neon immediate shift operation
12090 (VSHR or VSHL) MNEM. */
12093 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
12094 machine_mode mode
, int quad
,
12097 int width
, is_valid
;
12098 static char templ
[40];
12100 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
12101 gcc_assert (is_valid
!= 0);
12104 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
12106 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
12111 /* Output a sequence of pairwise operations to implement a reduction.
12112 NOTE: We do "too much work" here, because pairwise operations work on two
12113 registers-worth of operands in one go. Unfortunately we can't exploit those
12114 extra calculations to do the full operation in fewer steps, I don't think.
12115 Although all vector elements of the result but the first are ignored, we
12116 actually calculate the same result in each of the elements. An alternative
12117 such as initially loading a vector with zero to use as each of the second
12118 operands would use up an additional register and take an extra instruction,
12119 for no particular gain. */
12122 neon_pairwise_reduce (rtx op0
, rtx op1
, machine_mode mode
,
12123 rtx (*reduc
) (rtx
, rtx
, rtx
))
12125 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_UNIT_SIZE (mode
);
12128 for (i
= parts
/ 2; i
>= 1; i
/= 2)
12130 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
12131 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
12136 /* If VALS is a vector constant that can be loaded into a register
12137 using VDUP, generate instructions to do so and return an RTX to
12138 assign to the register. Otherwise return NULL_RTX. */
12141 neon_vdup_constant (rtx vals
)
12143 machine_mode mode
= GET_MODE (vals
);
12144 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12147 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
12150 if (!const_vec_duplicate_p (vals
, &x
))
12151 /* The elements are not all the same. We could handle repeating
12152 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12153 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12157 /* We can load this constant by using VDUP and a constant in a
12158 single ARM register. This will be cheaper than a vector
12161 x
= copy_to_mode_reg (inner_mode
, x
);
12162 return gen_vec_duplicate (mode
, x
);
12165 /* Generate code to load VALS, which is a PARALLEL containing only
12166 constants (for vec_init) or CONST_VECTOR, efficiently into a
12167 register. Returns an RTX to copy into the register, or NULL_RTX
12168 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12171 neon_make_constant (rtx vals
)
12173 machine_mode mode
= GET_MODE (vals
);
12175 rtx const_vec
= NULL_RTX
;
12176 int n_elts
= GET_MODE_NUNITS (mode
);
12180 if (GET_CODE (vals
) == CONST_VECTOR
)
12182 else if (GET_CODE (vals
) == PARALLEL
)
12184 /* A CONST_VECTOR must contain only CONST_INTs and
12185 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12186 Only store valid constants in a CONST_VECTOR. */
12187 for (i
= 0; i
< n_elts
; ++i
)
12189 rtx x
= XVECEXP (vals
, 0, i
);
12190 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
12193 if (n_const
== n_elts
)
12194 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
12197 gcc_unreachable ();
12199 if (const_vec
!= NULL
12200 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
12201 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12203 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
12204 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12205 pipeline cycle; creating the constant takes one or two ARM
12206 pipeline cycles. */
12208 else if (const_vec
!= NULL_RTX
)
12209 /* Load from constant pool. On Cortex-A8 this takes two cycles
12210 (for either double or quad vectors). We can not take advantage
12211 of single-cycle VLD1 because we need a PC-relative addressing
12215 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12216 We can not construct an initializer. */
12220 /* Initialize vector TARGET to VALS. */
12223 neon_expand_vector_init (rtx target
, rtx vals
)
12225 machine_mode mode
= GET_MODE (target
);
12226 machine_mode inner_mode
= GET_MODE_INNER (mode
);
12227 int n_elts
= GET_MODE_NUNITS (mode
);
12228 int n_var
= 0, one_var
= -1;
12229 bool all_same
= true;
12233 for (i
= 0; i
< n_elts
; ++i
)
12235 x
= XVECEXP (vals
, 0, i
);
12236 if (!CONSTANT_P (x
))
12237 ++n_var
, one_var
= i
;
12239 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
12245 rtx constant
= neon_make_constant (vals
);
12246 if (constant
!= NULL_RTX
)
12248 emit_move_insn (target
, constant
);
12253 /* Splat a single non-constant element if we can. */
12254 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
12256 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
12257 emit_insn (gen_rtx_SET (target
, gen_vec_duplicate (mode
, x
)));
12261 /* One field is non-constant. Load constant then overwrite varying
12262 field. This is more efficient than using the stack. */
12265 rtx copy
= copy_rtx (vals
);
12266 rtx index
= GEN_INT (one_var
);
12268 /* Load constant part of vector, substitute neighboring value for
12269 varying element. */
12270 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
12271 neon_expand_vector_init (target
, copy
);
12273 /* Insert variable. */
12274 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
12278 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
12281 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
12284 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
12287 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
12290 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
12293 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
12296 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
12299 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
12302 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
12305 gcc_unreachable ();
12310 /* Construct the vector in memory one field at a time
12311 and load the whole vector. */
12312 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
12313 for (i
= 0; i
< n_elts
; i
++)
12314 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
12315 i
* GET_MODE_SIZE (inner_mode
)),
12316 XVECEXP (vals
, 0, i
));
12317 emit_move_insn (target
, mem
);
12320 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12321 ERR if it doesn't. EXP indicates the source location, which includes the
12322 inlining history for intrinsics. */
12325 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12326 const_tree exp
, const char *desc
)
12328 HOST_WIDE_INT lane
;
12330 gcc_assert (CONST_INT_P (operand
));
12332 lane
= INTVAL (operand
);
12334 if (lane
< low
|| lane
>= high
)
12337 error ("%K%s %wd out of range %wd - %wd",
12338 exp
, desc
, lane
, low
, high
- 1);
12340 error ("%s %wd out of range %wd - %wd", desc
, lane
, low
, high
- 1);
12344 /* Bounds-check lanes. */
12347 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
12350 bounds_check (operand
, low
, high
, exp
, "lane");
12353 /* Bounds-check constants. */
12356 arm_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
12358 bounds_check (operand
, low
, high
, NULL_TREE
, "constant");
12362 neon_element_bits (machine_mode mode
)
12364 return GET_MODE_UNIT_BITSIZE (mode
);
12368 /* Predicates for `match_operand' and `match_operator'. */
12370 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12371 WB is true if full writeback address modes are allowed and is false
12372 if limited writeback address modes (POST_INC and PRE_DEC) are
12376 arm_coproc_mem_operand (rtx op
, bool wb
)
12380 /* Reject eliminable registers. */
12381 if (! (reload_in_progress
|| reload_completed
|| lra_in_progress
)
12382 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12383 || reg_mentioned_p (arg_pointer_rtx
, op
)
12384 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12385 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12386 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12387 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12390 /* Constants are converted into offsets from labels. */
12394 ind
= XEXP (op
, 0);
12396 if (reload_completed
12397 && (GET_CODE (ind
) == LABEL_REF
12398 || (GET_CODE (ind
) == CONST
12399 && GET_CODE (XEXP (ind
, 0)) == PLUS
12400 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12401 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12404 /* Match: (mem (reg)). */
12406 return arm_address_register_rtx_p (ind
, 0);
12408 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12409 acceptable in any case (subject to verification by
12410 arm_address_register_rtx_p). We need WB to be true to accept
12411 PRE_INC and POST_DEC. */
12412 if (GET_CODE (ind
) == POST_INC
12413 || GET_CODE (ind
) == PRE_DEC
12415 && (GET_CODE (ind
) == PRE_INC
12416 || GET_CODE (ind
) == POST_DEC
)))
12417 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12420 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
12421 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
12422 && GET_CODE (XEXP (ind
, 1)) == PLUS
12423 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
12424 ind
= XEXP (ind
, 1);
12429 if (GET_CODE (ind
) == PLUS
12430 && REG_P (XEXP (ind
, 0))
12431 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12432 && CONST_INT_P (XEXP (ind
, 1))
12433 && INTVAL (XEXP (ind
, 1)) > -1024
12434 && INTVAL (XEXP (ind
, 1)) < 1024
12435 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12441 /* Return TRUE if OP is a memory operand which we can load or store a vector
12442 to/from. TYPE is one of the following values:
12443 0 - Vector load/stor (vldr)
12444 1 - Core registers (ldm)
12445 2 - Element/structure loads (vld1)
12448 neon_vector_mem_operand (rtx op
, int type
, bool strict
)
12452 /* Reject eliminable registers. */
12453 if (strict
&& ! (reload_in_progress
|| reload_completed
)
12454 && (reg_mentioned_p (frame_pointer_rtx
, op
)
12455 || reg_mentioned_p (arg_pointer_rtx
, op
)
12456 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12457 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12458 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12459 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12462 /* Constants are converted into offsets from labels. */
12466 ind
= XEXP (op
, 0);
12468 if (reload_completed
12469 && (GET_CODE (ind
) == LABEL_REF
12470 || (GET_CODE (ind
) == CONST
12471 && GET_CODE (XEXP (ind
, 0)) == PLUS
12472 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12473 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12476 /* Match: (mem (reg)). */
12478 return arm_address_register_rtx_p (ind
, 0);
12480 /* Allow post-increment with Neon registers. */
12481 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
12482 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
12483 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12485 /* Allow post-increment by register for VLDn */
12486 if (type
== 2 && GET_CODE (ind
) == POST_MODIFY
12487 && GET_CODE (XEXP (ind
, 1)) == PLUS
12488 && REG_P (XEXP (XEXP (ind
, 1), 1)))
12495 && GET_CODE (ind
) == PLUS
12496 && REG_P (XEXP (ind
, 0))
12497 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
12498 && CONST_INT_P (XEXP (ind
, 1))
12499 && INTVAL (XEXP (ind
, 1)) > -1024
12500 /* For quad modes, we restrict the constant offset to be slightly less
12501 than what the instruction format permits. We have no such constraint
12502 on double mode offsets. (This must match arm_legitimate_index_p.) */
12503 && (INTVAL (XEXP (ind
, 1))
12504 < (VALID_NEON_QREG_MODE (GET_MODE (op
))? 1016 : 1024))
12505 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
12511 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12514 neon_struct_mem_operand (rtx op
)
12518 /* Reject eliminable registers. */
12519 if (! (reload_in_progress
|| reload_completed
)
12520 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
12521 || reg_mentioned_p (arg_pointer_rtx
, op
)
12522 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
12523 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
12524 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
12525 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
12528 /* Constants are converted into offsets from labels. */
12532 ind
= XEXP (op
, 0);
12534 if (reload_completed
12535 && (GET_CODE (ind
) == LABEL_REF
12536 || (GET_CODE (ind
) == CONST
12537 && GET_CODE (XEXP (ind
, 0)) == PLUS
12538 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
12539 && CONST_INT_P (XEXP (XEXP (ind
, 0), 1)))))
12542 /* Match: (mem (reg)). */
12544 return arm_address_register_rtx_p (ind
, 0);
12546 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12547 if (GET_CODE (ind
) == POST_INC
12548 || GET_CODE (ind
) == PRE_DEC
)
12549 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
12554 /* Return true if X is a register that will be eliminated later on. */
12556 arm_eliminable_register (rtx x
)
12558 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
12559 || REGNO (x
) == ARG_POINTER_REGNUM
12560 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
12561 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
12564 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12565 coprocessor registers. Otherwise return NO_REGS. */
12568 coproc_secondary_reload_class (machine_mode mode
, rtx x
, bool wb
)
12570 if (mode
== HFmode
)
12572 if (!TARGET_NEON_FP16
&& !TARGET_VFP_FP16INST
)
12573 return GENERAL_REGS
;
12574 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2, true))
12576 return GENERAL_REGS
;
12579 /* The neon move patterns handle all legitimate vector and struct
12582 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
12583 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
12584 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
12585 || VALID_NEON_STRUCT_MODE (mode
)))
12588 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
12591 return GENERAL_REGS
;
12594 /* Values which must be returned in the most-significant end of the return
12598 arm_return_in_msb (const_tree valtype
)
12600 return (TARGET_AAPCS_BASED
12601 && BYTES_BIG_ENDIAN
12602 && (AGGREGATE_TYPE_P (valtype
)
12603 || TREE_CODE (valtype
) == COMPLEX_TYPE
12604 || FIXED_POINT_TYPE_P (valtype
)));
12607 /* Return TRUE if X references a SYMBOL_REF. */
12609 symbol_mentioned_p (rtx x
)
12614 if (GET_CODE (x
) == SYMBOL_REF
)
12617 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12618 are constant offsets, not symbols. */
12619 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12622 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12624 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12630 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12631 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
12634 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
12641 /* Return TRUE if X references a LABEL_REF. */
12643 label_mentioned_p (rtx x
)
12648 if (GET_CODE (x
) == LABEL_REF
)
12651 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12652 instruction, but they are constant offsets, not symbols. */
12653 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
12656 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
12657 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
12663 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
12664 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
12667 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
12675 tls_mentioned_p (rtx x
)
12677 switch (GET_CODE (x
))
12680 return tls_mentioned_p (XEXP (x
, 0));
12683 if (XINT (x
, 1) == UNSPEC_TLS
)
12686 /* Fall through. */
12692 /* Must not copy any rtx that uses a pc-relative address.
12693 Also, disallow copying of load-exclusive instructions that
12694 may appear after splitting of compare-and-swap-style operations
12695 so as to prevent those loops from being transformed away from their
12696 canonical forms (see PR 69904). */
12699 arm_cannot_copy_insn_p (rtx_insn
*insn
)
12701 /* The tls call insn cannot be copied, as it is paired with a data
12703 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
12706 subrtx_iterator::array_type array
;
12707 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
12709 const_rtx x
= *iter
;
12710 if (GET_CODE (x
) == UNSPEC
12711 && (XINT (x
, 1) == UNSPEC_PIC_BASE
12712 || XINT (x
, 1) == UNSPEC_PIC_UNIFIED
))
12716 rtx set
= single_set (insn
);
12719 rtx src
= SET_SRC (set
);
12720 if (GET_CODE (src
) == ZERO_EXTEND
)
12721 src
= XEXP (src
, 0);
12723 /* Catch the load-exclusive and load-acquire operations. */
12724 if (GET_CODE (src
) == UNSPEC_VOLATILE
12725 && (XINT (src
, 1) == VUNSPEC_LL
12726 || XINT (src
, 1) == VUNSPEC_LAX
))
12733 minmax_code (rtx x
)
12735 enum rtx_code code
= GET_CODE (x
);
12748 gcc_unreachable ();
12752 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12755 arm_sat_operator_match (rtx lo_bound
, rtx hi_bound
,
12756 int *mask
, bool *signed_sat
)
12758 /* The high bound must be a power of two minus one. */
12759 int log
= exact_log2 (INTVAL (hi_bound
) + 1);
12763 /* The low bound is either zero (for usat) or one less than the
12764 negation of the high bound (for ssat). */
12765 if (INTVAL (lo_bound
) == 0)
12770 *signed_sat
= false;
12775 if (INTVAL (lo_bound
) == -INTVAL (hi_bound
) - 1)
12780 *signed_sat
= true;
12788 /* Return 1 if memory locations are adjacent. */
12790 adjacent_mem_locations (rtx a
, rtx b
)
12792 /* We don't guarantee to preserve the order of these memory refs. */
12793 if (volatile_refs_p (a
) || volatile_refs_p (b
))
12796 if ((REG_P (XEXP (a
, 0))
12797 || (GET_CODE (XEXP (a
, 0)) == PLUS
12798 && CONST_INT_P (XEXP (XEXP (a
, 0), 1))))
12799 && (REG_P (XEXP (b
, 0))
12800 || (GET_CODE (XEXP (b
, 0)) == PLUS
12801 && CONST_INT_P (XEXP (XEXP (b
, 0), 1)))))
12803 HOST_WIDE_INT val0
= 0, val1
= 0;
12807 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
12809 reg0
= XEXP (XEXP (a
, 0), 0);
12810 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
12813 reg0
= XEXP (a
, 0);
12815 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
12817 reg1
= XEXP (XEXP (b
, 0), 0);
12818 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
12821 reg1
= XEXP (b
, 0);
12823 /* Don't accept any offset that will require multiple
12824 instructions to handle, since this would cause the
12825 arith_adjacentmem pattern to output an overlong sequence. */
12826 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
12829 /* Don't allow an eliminable register: register elimination can make
12830 the offset too large. */
12831 if (arm_eliminable_register (reg0
))
12834 val_diff
= val1
- val0
;
12838 /* If the target has load delay slots, then there's no benefit
12839 to using an ldm instruction unless the offset is zero and
12840 we are optimizing for size. */
12841 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
12842 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
12843 && (val_diff
== 4 || val_diff
== -4));
12846 return ((REGNO (reg0
) == REGNO (reg1
))
12847 && (val_diff
== 4 || val_diff
== -4));
12853 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12854 for load operations, false for store operations. CONSECUTIVE is true
12855 if the register numbers in the operation must be consecutive in the register
12856 bank. RETURN_PC is true if value is to be loaded in PC.
12857 The pattern we are trying to match for load is:
12858 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12859 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12862 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12865 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12866 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12867 3. If consecutive is TRUE, then for kth register being loaded,
12868 REGNO (R_dk) = REGNO (R_d0) + k.
12869 The pattern for store is similar. */
12871 ldm_stm_operation_p (rtx op
, bool load
, machine_mode mode
,
12872 bool consecutive
, bool return_pc
)
12874 HOST_WIDE_INT count
= XVECLEN (op
, 0);
12875 rtx reg
, mem
, addr
;
12877 unsigned first_regno
;
12878 HOST_WIDE_INT i
= 1, base
= 0, offset
= 0;
12880 bool addr_reg_in_reglist
= false;
12881 bool update
= false;
12886 /* If not in SImode, then registers must be consecutive
12887 (e.g., VLDM instructions for DFmode). */
12888 gcc_assert ((mode
== SImode
) || consecutive
);
12889 /* Setting return_pc for stores is illegal. */
12890 gcc_assert (!return_pc
|| load
);
12892 /* Set up the increments and the regs per val based on the mode. */
12893 reg_increment
= GET_MODE_SIZE (mode
);
12894 regs_per_val
= reg_increment
/ 4;
12895 offset_adj
= return_pc
? 1 : 0;
12898 || GET_CODE (XVECEXP (op
, 0, offset_adj
)) != SET
12899 || (load
&& !REG_P (SET_DEST (XVECEXP (op
, 0, offset_adj
)))))
12902 /* Check if this is a write-back. */
12903 elt
= XVECEXP (op
, 0, offset_adj
);
12904 if (GET_CODE (SET_SRC (elt
)) == PLUS
)
12910 /* The offset adjustment must be the number of registers being
12911 popped times the size of a single register. */
12912 if (!REG_P (SET_DEST (elt
))
12913 || !REG_P (XEXP (SET_SRC (elt
), 0))
12914 || (REGNO (SET_DEST (elt
)) != REGNO (XEXP (SET_SRC (elt
), 0)))
12915 || !CONST_INT_P (XEXP (SET_SRC (elt
), 1))
12916 || INTVAL (XEXP (SET_SRC (elt
), 1)) !=
12917 ((count
- 1 - offset_adj
) * reg_increment
))
12921 i
= i
+ offset_adj
;
12922 base
= base
+ offset_adj
;
12923 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12924 success depends on the type: VLDM can do just one reg,
12925 LDM must do at least two. */
12926 if ((count
<= i
) && (mode
== SImode
))
12929 elt
= XVECEXP (op
, 0, i
- 1);
12930 if (GET_CODE (elt
) != SET
)
12935 reg
= SET_DEST (elt
);
12936 mem
= SET_SRC (elt
);
12940 reg
= SET_SRC (elt
);
12941 mem
= SET_DEST (elt
);
12944 if (!REG_P (reg
) || !MEM_P (mem
))
12947 regno
= REGNO (reg
);
12948 first_regno
= regno
;
12949 addr
= XEXP (mem
, 0);
12950 if (GET_CODE (addr
) == PLUS
)
12952 if (!CONST_INT_P (XEXP (addr
, 1)))
12955 offset
= INTVAL (XEXP (addr
, 1));
12956 addr
= XEXP (addr
, 0);
12962 /* Don't allow SP to be loaded unless it is also the base register. It
12963 guarantees that SP is reset correctly when an LDM instruction
12964 is interrupted. Otherwise, we might end up with a corrupt stack. */
12965 if (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12968 for (; i
< count
; i
++)
12970 elt
= XVECEXP (op
, 0, i
);
12971 if (GET_CODE (elt
) != SET
)
12976 reg
= SET_DEST (elt
);
12977 mem
= SET_SRC (elt
);
12981 reg
= SET_SRC (elt
);
12982 mem
= SET_DEST (elt
);
12986 || GET_MODE (reg
) != mode
12987 || REGNO (reg
) <= regno
12990 (unsigned int) (first_regno
+ regs_per_val
* (i
- base
))))
12991 /* Don't allow SP to be loaded unless it is also the base register. It
12992 guarantees that SP is reset correctly when an LDM instruction
12993 is interrupted. Otherwise, we might end up with a corrupt stack. */
12994 || (load
&& (REGNO (reg
) == SP_REGNUM
) && (REGNO (addr
) != SP_REGNUM
))
12996 || GET_MODE (mem
) != mode
12997 || ((GET_CODE (XEXP (mem
, 0)) != PLUS
12998 || !rtx_equal_p (XEXP (XEXP (mem
, 0), 0), addr
)
12999 || !CONST_INT_P (XEXP (XEXP (mem
, 0), 1))
13000 || (INTVAL (XEXP (XEXP (mem
, 0), 1)) !=
13001 offset
+ (i
- base
) * reg_increment
))
13002 && (!REG_P (XEXP (mem
, 0))
13003 || offset
+ (i
- base
) * reg_increment
!= 0)))
13006 regno
= REGNO (reg
);
13007 if (regno
== REGNO (addr
))
13008 addr_reg_in_reglist
= true;
13013 if (update
&& addr_reg_in_reglist
)
13016 /* For Thumb-1, address register is always modified - either by write-back
13017 or by explicit load. If the pattern does not describe an update,
13018 then the address register must be in the list of loaded registers. */
13020 return update
|| addr_reg_in_reglist
;
13026 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13027 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13028 instruction. ADD_OFFSET is nonzero if the base address register needs
13029 to be modified with an add instruction before we can use it. */
13032 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
13033 int nops
, HOST_WIDE_INT add_offset
)
13035 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13036 if the offset isn't small enough. The reason 2 ldrs are faster
13037 is because these ARMs are able to do more than one cache access
13038 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13039 whilst the ARM8 has a double bandwidth cache. This means that
13040 these cores can do both an instruction fetch and a data fetch in
13041 a single cycle, so the trick of calculating the address into a
13042 scratch register (one of the result regs) and then doing a load
13043 multiple actually becomes slower (and no smaller in code size).
13044 That is the transformation
13046 ldr rd1, [rbase + offset]
13047 ldr rd2, [rbase + offset + 4]
13051 add rd1, rbase, offset
13052 ldmia rd1, {rd1, rd2}
13054 produces worse code -- '3 cycles + any stalls on rd2' instead of
13055 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13056 access per cycle, the first sequence could never complete in less
13057 than 6 cycles, whereas the ldm sequence would only take 5 and
13058 would make better use of sequential accesses if not hitting the
13061 We cheat here and test 'arm_ld_sched' which we currently know to
13062 only be true for the ARM8, ARM9 and StrongARM. If this ever
13063 changes, then the test below needs to be reworked. */
13064 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
13067 /* XScale has load-store double instructions, but they have stricter
13068 alignment requirements than load-store multiple, so we cannot
13071 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13072 the pipeline until completion.
13080 An ldr instruction takes 1-3 cycles, but does not block the
13089 Best case ldr will always win. However, the more ldr instructions
13090 we issue, the less likely we are to be able to schedule them well.
13091 Using ldr instructions also increases code size.
13093 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13094 for counts of 3 or 4 regs. */
13095 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
13100 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13101 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13102 an array ORDER which describes the sequence to use when accessing the
13103 offsets that produces an ascending order. In this sequence, each
13104 offset must be larger by exactly 4 than the previous one. ORDER[0]
13105 must have been filled in with the lowest offset by the caller.
13106 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13107 we use to verify that ORDER produces an ascending order of registers.
13108 Return true if it was possible to construct such an order, false if
13112 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
13113 int *unsorted_regs
)
13116 for (i
= 1; i
< nops
; i
++)
13120 order
[i
] = order
[i
- 1];
13121 for (j
= 0; j
< nops
; j
++)
13122 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
13124 /* We must find exactly one offset that is higher than the
13125 previous one by 4. */
13126 if (order
[i
] != order
[i
- 1])
13130 if (order
[i
] == order
[i
- 1])
13132 /* The register numbers must be ascending. */
13133 if (unsorted_regs
!= NULL
13134 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
13140 /* Used to determine in a peephole whether a sequence of load
13141 instructions can be changed into a load-multiple instruction.
13142 NOPS is the number of separate load instructions we are examining. The
13143 first NOPS entries in OPERANDS are the destination registers, the
13144 next NOPS entries are memory operands. If this function is
13145 successful, *BASE is set to the common base register of the memory
13146 accesses; *LOAD_OFFSET is set to the first memory location's offset
13147 from that base register.
13148 REGS is an array filled in with the destination register numbers.
13149 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13150 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13151 the sequence of registers in REGS matches the loads from ascending memory
13152 locations, and the function verifies that the register numbers are
13153 themselves ascending. If CHECK_REGS is false, the register numbers
13154 are stored in the order they are found in the operands. */
13156 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
13157 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
13159 int unsorted_regs
[MAX_LDM_STM_OPS
];
13160 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13161 int order
[MAX_LDM_STM_OPS
];
13162 rtx base_reg_rtx
= NULL
;
13166 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13167 easily extended if required. */
13168 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13170 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13172 /* Loop over the operands and check that the memory references are
13173 suitable (i.e. immediate offsets from the same base register). At
13174 the same time, extract the target register, and the memory
13176 for (i
= 0; i
< nops
; i
++)
13181 /* Convert a subreg of a mem into the mem itself. */
13182 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13183 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13185 gcc_assert (MEM_P (operands
[nops
+ i
]));
13187 /* Don't reorder volatile memory references; it doesn't seem worth
13188 looking for the case where the order is ok anyway. */
13189 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13192 offset
= const0_rtx
;
13194 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13195 || (GET_CODE (reg
) == SUBREG
13196 && REG_P (reg
= SUBREG_REG (reg
))))
13197 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13198 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13199 || (GET_CODE (reg
) == SUBREG
13200 && REG_P (reg
= SUBREG_REG (reg
))))
13201 && (CONST_INT_P (offset
13202 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13206 base_reg
= REGNO (reg
);
13207 base_reg_rtx
= reg
;
13208 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13211 else if (base_reg
!= (int) REGNO (reg
))
13212 /* Not addressed from the same base register. */
13215 unsorted_regs
[i
] = (REG_P (operands
[i
])
13216 ? REGNO (operands
[i
])
13217 : REGNO (SUBREG_REG (operands
[i
])));
13219 /* If it isn't an integer register, or if it overwrites the
13220 base register but isn't the last insn in the list, then
13221 we can't do this. */
13222 if (unsorted_regs
[i
] < 0
13223 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13224 || unsorted_regs
[i
] > 14
13225 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
13228 /* Don't allow SP to be loaded unless it is also the base
13229 register. It guarantees that SP is reset correctly when
13230 an LDM instruction is interrupted. Otherwise, we might
13231 end up with a corrupt stack. */
13232 if (unsorted_regs
[i
] == SP_REGNUM
&& base_reg
!= SP_REGNUM
)
13235 unsorted_offsets
[i
] = INTVAL (offset
);
13236 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13240 /* Not a suitable memory address. */
13244 /* All the useful information has now been extracted from the
13245 operands into unsorted_regs and unsorted_offsets; additionally,
13246 order[0] has been set to the lowest offset in the list. Sort
13247 the offsets into order, verifying that they are adjacent, and
13248 check that the register numbers are ascending. */
13249 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13250 check_regs
? unsorted_regs
: NULL
))
13254 memcpy (saved_order
, order
, sizeof order
);
13260 for (i
= 0; i
< nops
; i
++)
13261 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13263 *load_offset
= unsorted_offsets
[order
[0]];
13267 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
13270 if (unsorted_offsets
[order
[0]] == 0)
13271 ldm_case
= 1; /* ldmia */
13272 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13273 ldm_case
= 2; /* ldmib */
13274 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13275 ldm_case
= 3; /* ldmda */
13276 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13277 ldm_case
= 4; /* ldmdb */
13278 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
13279 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
13284 if (!multiple_operation_profitable_p (false, nops
,
13286 ? unsorted_offsets
[order
[0]] : 0))
13292 /* Used to determine in a peephole whether a sequence of store instructions can
13293 be changed into a store-multiple instruction.
13294 NOPS is the number of separate store instructions we are examining.
13295 NOPS_TOTAL is the total number of instructions recognized by the peephole
13297 The first NOPS entries in OPERANDS are the source registers, the next
13298 NOPS entries are memory operands. If this function is successful, *BASE is
13299 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13300 to the first memory location's offset from that base register. REGS is an
13301 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13302 likewise filled with the corresponding rtx's.
13303 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13304 numbers to an ascending order of stores.
13305 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13306 from ascending memory locations, and the function verifies that the register
13307 numbers are themselves ascending. If CHECK_REGS is false, the register
13308 numbers are stored in the order they are found in the operands. */
13310 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
13311 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
13312 HOST_WIDE_INT
*load_offset
, bool check_regs
)
13314 int unsorted_regs
[MAX_LDM_STM_OPS
];
13315 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
13316 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
13317 int order
[MAX_LDM_STM_OPS
];
13319 rtx base_reg_rtx
= NULL
;
13322 /* Write back of base register is currently only supported for Thumb 1. */
13323 int base_writeback
= TARGET_THUMB1
;
13325 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13326 easily extended if required. */
13327 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
13329 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
13331 /* Loop over the operands and check that the memory references are
13332 suitable (i.e. immediate offsets from the same base register). At
13333 the same time, extract the target register, and the memory
13335 for (i
= 0; i
< nops
; i
++)
13340 /* Convert a subreg of a mem into the mem itself. */
13341 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
13342 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
), true);
13344 gcc_assert (MEM_P (operands
[nops
+ i
]));
13346 /* Don't reorder volatile memory references; it doesn't seem worth
13347 looking for the case where the order is ok anyway. */
13348 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
13351 offset
= const0_rtx
;
13353 if ((REG_P (reg
= XEXP (operands
[nops
+ i
], 0))
13354 || (GET_CODE (reg
) == SUBREG
13355 && REG_P (reg
= SUBREG_REG (reg
))))
13356 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
13357 && ((REG_P (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0)))
13358 || (GET_CODE (reg
) == SUBREG
13359 && REG_P (reg
= SUBREG_REG (reg
))))
13360 && (CONST_INT_P (offset
13361 = XEXP (XEXP (operands
[nops
+ i
], 0), 1)))))
13363 unsorted_reg_rtxs
[i
] = (REG_P (operands
[i
])
13364 ? operands
[i
] : SUBREG_REG (operands
[i
]));
13365 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
13369 base_reg
= REGNO (reg
);
13370 base_reg_rtx
= reg
;
13371 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
13374 else if (base_reg
!= (int) REGNO (reg
))
13375 /* Not addressed from the same base register. */
13378 /* If it isn't an integer register, then we can't do this. */
13379 if (unsorted_regs
[i
] < 0
13380 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
13381 /* The effects are unpredictable if the base register is
13382 both updated and stored. */
13383 || (base_writeback
&& unsorted_regs
[i
] == base_reg
)
13384 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
13385 || unsorted_regs
[i
] > 14)
13388 unsorted_offsets
[i
] = INTVAL (offset
);
13389 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
13393 /* Not a suitable memory address. */
13397 /* All the useful information has now been extracted from the
13398 operands into unsorted_regs and unsorted_offsets; additionally,
13399 order[0] has been set to the lowest offset in the list. Sort
13400 the offsets into order, verifying that they are adjacent, and
13401 check that the register numbers are ascending. */
13402 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
13403 check_regs
? unsorted_regs
: NULL
))
13407 memcpy (saved_order
, order
, sizeof order
);
13413 for (i
= 0; i
< nops
; i
++)
13415 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
13417 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
13420 *load_offset
= unsorted_offsets
[order
[0]];
13424 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
13427 if (unsorted_offsets
[order
[0]] == 0)
13428 stm_case
= 1; /* stmia */
13429 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
13430 stm_case
= 2; /* stmib */
13431 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
13432 stm_case
= 3; /* stmda */
13433 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
13434 stm_case
= 4; /* stmdb */
13438 if (!multiple_operation_profitable_p (false, nops
, 0))
13444 /* Routines for use in generating RTL. */
13446 /* Generate a load-multiple instruction. COUNT is the number of loads in
13447 the instruction; REGS and MEMS are arrays containing the operands.
13448 BASEREG is the base register to be used in addressing the memory operands.
13449 WBACK_OFFSET is nonzero if the instruction should update the base
13453 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13454 HOST_WIDE_INT wback_offset
)
13459 if (!multiple_operation_profitable_p (false, count
, 0))
13465 for (i
= 0; i
< count
; i
++)
13466 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
13468 if (wback_offset
!= 0)
13469 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13471 seq
= get_insns ();
13477 result
= gen_rtx_PARALLEL (VOIDmode
,
13478 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13479 if (wback_offset
!= 0)
13481 XVECEXP (result
, 0, 0)
13482 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13487 for (j
= 0; i
< count
; i
++, j
++)
13488 XVECEXP (result
, 0, i
)
13489 = gen_rtx_SET (gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
13494 /* Generate a store-multiple instruction. COUNT is the number of stores in
13495 the instruction; REGS and MEMS are arrays containing the operands.
13496 BASEREG is the base register to be used in addressing the memory operands.
13497 WBACK_OFFSET is nonzero if the instruction should update the base
13501 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
13502 HOST_WIDE_INT wback_offset
)
13507 if (GET_CODE (basereg
) == PLUS
)
13508 basereg
= XEXP (basereg
, 0);
13510 if (!multiple_operation_profitable_p (false, count
, 0))
13516 for (i
= 0; i
< count
; i
++)
13517 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
13519 if (wback_offset
!= 0)
13520 emit_move_insn (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13522 seq
= get_insns ();
13528 result
= gen_rtx_PARALLEL (VOIDmode
,
13529 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
13530 if (wback_offset
!= 0)
13532 XVECEXP (result
, 0, 0)
13533 = gen_rtx_SET (basereg
, plus_constant (Pmode
, basereg
, wback_offset
));
13538 for (j
= 0; i
< count
; i
++, j
++)
13539 XVECEXP (result
, 0, i
)
13540 = gen_rtx_SET (mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
13545 /* Generate either a load-multiple or a store-multiple instruction. This
13546 function can be used in situations where we can start with a single MEM
13547 rtx and adjust its address upwards.
13548 COUNT is the number of operations in the instruction, not counting a
13549 possible update of the base register. REGS is an array containing the
13551 BASEREG is the base register to be used in addressing the memory operands,
13552 which are constructed from BASEMEM.
13553 WRITE_BACK specifies whether the generated instruction should include an
13554 update of the base register.
13555 OFFSETP is used to pass an offset to and from this function; this offset
13556 is not used when constructing the address (instead BASEMEM should have an
13557 appropriate offset in its address), it is used only for setting
13558 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13561 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
13562 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
13564 rtx mems
[MAX_LDM_STM_OPS
];
13565 HOST_WIDE_INT offset
= *offsetp
;
13568 gcc_assert (count
<= MAX_LDM_STM_OPS
);
13570 if (GET_CODE (basereg
) == PLUS
)
13571 basereg
= XEXP (basereg
, 0);
13573 for (i
= 0; i
< count
; i
++)
13575 rtx addr
= plus_constant (Pmode
, basereg
, i
* 4);
13576 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
13584 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
13585 write_back
? 4 * count
: 0);
13587 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
13588 write_back
? 4 * count
: 0);
13592 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13593 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13595 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
13600 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
13601 rtx basemem
, HOST_WIDE_INT
*offsetp
)
13603 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
13607 /* Called from a peephole2 expander to turn a sequence of loads into an
13608 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13609 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13610 is true if we can reorder the registers because they are used commutatively
13612 Returns true iff we could generate a new instruction. */
13615 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
13617 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13618 rtx mems
[MAX_LDM_STM_OPS
];
13619 int i
, j
, base_reg
;
13621 HOST_WIDE_INT offset
;
13622 int write_back
= FALSE
;
13626 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
13627 &base_reg
, &offset
, !sort_regs
);
13633 for (i
= 0; i
< nops
- 1; i
++)
13634 for (j
= i
+ 1; j
< nops
; j
++)
13635 if (regs
[i
] > regs
[j
])
13641 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13645 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
13646 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
13652 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
13653 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
13655 if (!TARGET_THUMB1
)
13656 base_reg_rtx
= newbase
;
13659 for (i
= 0; i
< nops
; i
++)
13661 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13662 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13665 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13666 write_back
? offset
+ i
* 4 : 0));
13670 /* Called from a peephole2 expander to turn a sequence of stores into an
13671 STM instruction. OPERANDS are the operands found by the peephole matcher;
13672 NOPS indicates how many separate stores we are trying to combine.
13673 Returns true iff we could generate a new instruction. */
13676 gen_stm_seq (rtx
*operands
, int nops
)
13679 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13680 rtx mems
[MAX_LDM_STM_OPS
];
13683 HOST_WIDE_INT offset
;
13684 int write_back
= FALSE
;
13687 bool base_reg_dies
;
13689 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
13690 mem_order
, &base_reg
, &offset
, true);
13695 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13697 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
13700 gcc_assert (base_reg_dies
);
13706 gcc_assert (base_reg_dies
);
13707 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13711 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13713 for (i
= 0; i
< nops
; i
++)
13715 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13716 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13719 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
13720 write_back
? offset
+ i
* 4 : 0));
13724 /* Called from a peephole2 expander to turn a sequence of stores that are
13725 preceded by constant loads into an STM instruction. OPERANDS are the
13726 operands found by the peephole matcher; NOPS indicates how many
13727 separate stores we are trying to combine; there are 2 * NOPS
13728 instructions in the peephole.
13729 Returns true iff we could generate a new instruction. */
13732 gen_const_stm_seq (rtx
*operands
, int nops
)
13734 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
13735 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
13736 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
13737 rtx mems
[MAX_LDM_STM_OPS
];
13740 HOST_WIDE_INT offset
;
13741 int write_back
= FALSE
;
13744 bool base_reg_dies
;
13746 HARD_REG_SET allocated
;
13748 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
13749 mem_order
, &base_reg
, &offset
, false);
13754 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
13756 /* If the same register is used more than once, try to find a free
13758 CLEAR_HARD_REG_SET (allocated
);
13759 for (i
= 0; i
< nops
; i
++)
13761 for (j
= i
+ 1; j
< nops
; j
++)
13762 if (regs
[i
] == regs
[j
])
13764 rtx t
= peep2_find_free_register (0, nops
* 2,
13765 TARGET_THUMB1
? "l" : "r",
13766 SImode
, &allocated
);
13770 regs
[i
] = REGNO (t
);
13774 /* Compute an ordering that maps the register numbers to an ascending
13777 for (i
= 0; i
< nops
; i
++)
13778 if (regs
[i
] < regs
[reg_order
[0]])
13781 for (i
= 1; i
< nops
; i
++)
13783 int this_order
= reg_order
[i
- 1];
13784 for (j
= 0; j
< nops
; j
++)
13785 if (regs
[j
] > regs
[reg_order
[i
- 1]]
13786 && (this_order
== reg_order
[i
- 1]
13787 || regs
[j
] < regs
[this_order
]))
13789 reg_order
[i
] = this_order
;
13792 /* Ensure that registers that must be live after the instruction end
13793 up with the correct value. */
13794 for (i
= 0; i
< nops
; i
++)
13796 int this_order
= reg_order
[i
];
13797 if ((this_order
!= mem_order
[i
]
13798 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
13799 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
13803 /* Load the constants. */
13804 for (i
= 0; i
< nops
; i
++)
13806 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
13807 sorted_regs
[i
] = regs
[reg_order
[i
]];
13808 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
13811 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
13813 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
13816 gcc_assert (base_reg_dies
);
13822 gcc_assert (base_reg_dies
);
13823 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
13827 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
);
13829 for (i
= 0; i
< nops
; i
++)
13831 addr
= plus_constant (Pmode
, base_reg_rtx
, offset
+ i
* 4);
13832 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
13835 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
13836 write_back
? offset
+ i
* 4 : 0));
13840 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13841 unaligned copies on processors which support unaligned semantics for those
13842 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13843 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13844 An interleave factor of 1 (the minimum) will perform no interleaving.
13845 Load/store multiple are used for aligned addresses where possible. */
13848 arm_block_move_unaligned_straight (rtx dstbase
, rtx srcbase
,
13849 HOST_WIDE_INT length
,
13850 unsigned int interleave_factor
)
13852 rtx
*regs
= XALLOCAVEC (rtx
, interleave_factor
);
13853 int *regnos
= XALLOCAVEC (int, interleave_factor
);
13854 HOST_WIDE_INT block_size_bytes
= interleave_factor
* UNITS_PER_WORD
;
13855 HOST_WIDE_INT i
, j
;
13856 HOST_WIDE_INT remaining
= length
, words
;
13857 rtx halfword_tmp
= NULL
, byte_tmp
= NULL
;
13859 bool src_aligned
= MEM_ALIGN (srcbase
) >= BITS_PER_WORD
;
13860 bool dst_aligned
= MEM_ALIGN (dstbase
) >= BITS_PER_WORD
;
13861 HOST_WIDE_INT srcoffset
, dstoffset
;
13862 HOST_WIDE_INT src_autoinc
, dst_autoinc
;
13865 gcc_assert (1 <= interleave_factor
&& interleave_factor
<= 4);
13867 /* Use hard registers if we have aligned source or destination so we can use
13868 load/store multiple with contiguous registers. */
13869 if (dst_aligned
|| src_aligned
)
13870 for (i
= 0; i
< interleave_factor
; i
++)
13871 regs
[i
] = gen_rtx_REG (SImode
, i
);
13873 for (i
= 0; i
< interleave_factor
; i
++)
13874 regs
[i
] = gen_reg_rtx (SImode
);
13876 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
13877 src
= copy_addr_to_reg (XEXP (srcbase
, 0));
13879 srcoffset
= dstoffset
= 0;
13881 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13882 For copying the last bytes we want to subtract this offset again. */
13883 src_autoinc
= dst_autoinc
= 0;
13885 for (i
= 0; i
< interleave_factor
; i
++)
13888 /* Copy BLOCK_SIZE_BYTES chunks. */
13890 for (i
= 0; i
+ block_size_bytes
<= length
; i
+= block_size_bytes
)
13893 if (src_aligned
&& interleave_factor
> 1)
13895 emit_insn (arm_gen_load_multiple (regnos
, interleave_factor
, src
,
13896 TRUE
, srcbase
, &srcoffset
));
13897 src_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13901 for (j
= 0; j
< interleave_factor
; j
++)
13903 addr
= plus_constant (Pmode
, src
, (srcoffset
+ j
* UNITS_PER_WORD
13905 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13906 srcoffset
+ j
* UNITS_PER_WORD
);
13907 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13909 srcoffset
+= block_size_bytes
;
13913 if (dst_aligned
&& interleave_factor
> 1)
13915 emit_insn (arm_gen_store_multiple (regnos
, interleave_factor
, dst
,
13916 TRUE
, dstbase
, &dstoffset
));
13917 dst_autoinc
+= UNITS_PER_WORD
* interleave_factor
;
13921 for (j
= 0; j
< interleave_factor
; j
++)
13923 addr
= plus_constant (Pmode
, dst
, (dstoffset
+ j
* UNITS_PER_WORD
13925 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13926 dstoffset
+ j
* UNITS_PER_WORD
);
13927 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13929 dstoffset
+= block_size_bytes
;
13932 remaining
-= block_size_bytes
;
13935 /* Copy any whole words left (note these aren't interleaved with any
13936 subsequent halfword/byte load/stores in the interests of simplicity). */
13938 words
= remaining
/ UNITS_PER_WORD
;
13940 gcc_assert (words
< interleave_factor
);
13942 if (src_aligned
&& words
> 1)
13944 emit_insn (arm_gen_load_multiple (regnos
, words
, src
, TRUE
, srcbase
,
13946 src_autoinc
+= UNITS_PER_WORD
* words
;
13950 for (j
= 0; j
< words
; j
++)
13952 addr
= plus_constant (Pmode
, src
,
13953 srcoffset
+ j
* UNITS_PER_WORD
- src_autoinc
);
13954 mem
= adjust_automodify_address (srcbase
, SImode
, addr
,
13955 srcoffset
+ j
* UNITS_PER_WORD
);
13957 emit_move_insn (regs
[j
], mem
);
13959 emit_insn (gen_unaligned_loadsi (regs
[j
], mem
));
13961 srcoffset
+= words
* UNITS_PER_WORD
;
13964 if (dst_aligned
&& words
> 1)
13966 emit_insn (arm_gen_store_multiple (regnos
, words
, dst
, TRUE
, dstbase
,
13968 dst_autoinc
+= words
* UNITS_PER_WORD
;
13972 for (j
= 0; j
< words
; j
++)
13974 addr
= plus_constant (Pmode
, dst
,
13975 dstoffset
+ j
* UNITS_PER_WORD
- dst_autoinc
);
13976 mem
= adjust_automodify_address (dstbase
, SImode
, addr
,
13977 dstoffset
+ j
* UNITS_PER_WORD
);
13979 emit_move_insn (mem
, regs
[j
]);
13981 emit_insn (gen_unaligned_storesi (mem
, regs
[j
]));
13983 dstoffset
+= words
* UNITS_PER_WORD
;
13986 remaining
-= words
* UNITS_PER_WORD
;
13988 gcc_assert (remaining
< 4);
13990 /* Copy a halfword if necessary. */
13992 if (remaining
>= 2)
13994 halfword_tmp
= gen_reg_rtx (SImode
);
13996 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
13997 mem
= adjust_automodify_address (srcbase
, HImode
, addr
, srcoffset
);
13998 emit_insn (gen_unaligned_loadhiu (halfword_tmp
, mem
));
14000 /* Either write out immediately, or delay until we've loaded the last
14001 byte, depending on interleave factor. */
14002 if (interleave_factor
== 1)
14004 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14005 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14006 emit_insn (gen_unaligned_storehi (mem
,
14007 gen_lowpart (HImode
, halfword_tmp
)));
14008 halfword_tmp
= NULL
;
14016 gcc_assert (remaining
< 2);
14018 /* Copy last byte. */
14020 if ((remaining
& 1) != 0)
14022 byte_tmp
= gen_reg_rtx (SImode
);
14024 addr
= plus_constant (Pmode
, src
, srcoffset
- src_autoinc
);
14025 mem
= adjust_automodify_address (srcbase
, QImode
, addr
, srcoffset
);
14026 emit_move_insn (gen_lowpart (QImode
, byte_tmp
), mem
);
14028 if (interleave_factor
== 1)
14030 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14031 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14032 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14041 /* Store last halfword if we haven't done so already. */
14045 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14046 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, dstoffset
);
14047 emit_insn (gen_unaligned_storehi (mem
,
14048 gen_lowpart (HImode
, halfword_tmp
)));
14052 /* Likewise for last byte. */
14056 addr
= plus_constant (Pmode
, dst
, dstoffset
- dst_autoinc
);
14057 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, dstoffset
);
14058 emit_move_insn (mem
, gen_lowpart (QImode
, byte_tmp
));
14062 gcc_assert (remaining
== 0 && srcoffset
== dstoffset
);
14065 /* From mips_adjust_block_mem:
14067 Helper function for doing a loop-based block operation on memory
14068 reference MEM. Each iteration of the loop will operate on LENGTH
14071 Create a new base register for use within the loop and point it to
14072 the start of MEM. Create a new memory reference that uses this
14073 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14076 arm_adjust_block_mem (rtx mem
, HOST_WIDE_INT length
, rtx
*loop_reg
,
14079 *loop_reg
= copy_addr_to_reg (XEXP (mem
, 0));
14081 /* Although the new mem does not refer to a known location,
14082 it does keep up to LENGTH bytes of alignment. */
14083 *loop_mem
= change_address (mem
, BLKmode
, *loop_reg
);
14084 set_mem_align (*loop_mem
, MIN (MEM_ALIGN (mem
), length
* BITS_PER_UNIT
));
14087 /* From mips_block_move_loop:
14089 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14090 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14091 the memory regions do not overlap. */
14094 arm_block_move_unaligned_loop (rtx dest
, rtx src
, HOST_WIDE_INT length
,
14095 unsigned int interleave_factor
,
14096 HOST_WIDE_INT bytes_per_iter
)
14098 rtx src_reg
, dest_reg
, final_src
, test
;
14099 HOST_WIDE_INT leftover
;
14101 leftover
= length
% bytes_per_iter
;
14102 length
-= leftover
;
14104 /* Create registers and memory references for use within the loop. */
14105 arm_adjust_block_mem (src
, bytes_per_iter
, &src_reg
, &src
);
14106 arm_adjust_block_mem (dest
, bytes_per_iter
, &dest_reg
, &dest
);
14108 /* Calculate the value that SRC_REG should have after the last iteration of
14110 final_src
= expand_simple_binop (Pmode
, PLUS
, src_reg
, GEN_INT (length
),
14111 0, 0, OPTAB_WIDEN
);
14113 /* Emit the start of the loop. */
14114 rtx_code_label
*label
= gen_label_rtx ();
14115 emit_label (label
);
14117 /* Emit the loop body. */
14118 arm_block_move_unaligned_straight (dest
, src
, bytes_per_iter
,
14119 interleave_factor
);
14121 /* Move on to the next block. */
14122 emit_move_insn (src_reg
, plus_constant (Pmode
, src_reg
, bytes_per_iter
));
14123 emit_move_insn (dest_reg
, plus_constant (Pmode
, dest_reg
, bytes_per_iter
));
14125 /* Emit the loop condition. */
14126 test
= gen_rtx_NE (VOIDmode
, src_reg
, final_src
);
14127 emit_jump_insn (gen_cbranchsi4 (test
, src_reg
, final_src
, label
));
14129 /* Mop up any left-over bytes. */
14131 arm_block_move_unaligned_straight (dest
, src
, leftover
, interleave_factor
);
14134 /* Emit a block move when either the source or destination is unaligned (not
14135 aligned to a four-byte boundary). This may need further tuning depending on
14136 core type, optimize_size setting, etc. */
14139 arm_movmemqi_unaligned (rtx
*operands
)
14141 HOST_WIDE_INT length
= INTVAL (operands
[2]);
14145 bool src_aligned
= MEM_ALIGN (operands
[1]) >= BITS_PER_WORD
;
14146 bool dst_aligned
= MEM_ALIGN (operands
[0]) >= BITS_PER_WORD
;
14147 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14148 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14149 or dst_aligned though: allow more interleaving in those cases since the
14150 resulting code can be smaller. */
14151 unsigned int interleave_factor
= (src_aligned
|| dst_aligned
) ? 2 : 1;
14152 HOST_WIDE_INT bytes_per_iter
= (src_aligned
|| dst_aligned
) ? 8 : 4;
14155 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
,
14156 interleave_factor
, bytes_per_iter
);
14158 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
,
14159 interleave_factor
);
14163 /* Note that the loop created by arm_block_move_unaligned_loop may be
14164 subject to loop unrolling, which makes tuning this condition a little
14167 arm_block_move_unaligned_loop (operands
[0], operands
[1], length
, 4, 16);
14169 arm_block_move_unaligned_straight (operands
[0], operands
[1], length
, 4);
14176 arm_gen_movmemqi (rtx
*operands
)
14178 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
14179 HOST_WIDE_INT srcoffset
, dstoffset
;
14180 rtx src
, dst
, srcbase
, dstbase
;
14181 rtx part_bytes_reg
= NULL
;
14184 if (!CONST_INT_P (operands
[2])
14185 || !CONST_INT_P (operands
[3])
14186 || INTVAL (operands
[2]) > 64)
14189 if (unaligned_access
&& (INTVAL (operands
[3]) & 3) != 0)
14190 return arm_movmemqi_unaligned (operands
);
14192 if (INTVAL (operands
[3]) & 3)
14195 dstbase
= operands
[0];
14196 srcbase
= operands
[1];
14198 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
14199 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
14201 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
14202 out_words_to_go
= INTVAL (operands
[2]) / 4;
14203 last_bytes
= INTVAL (operands
[2]) & 3;
14204 dstoffset
= srcoffset
= 0;
14206 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
14207 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
14209 while (in_words_to_go
>= 2)
14211 if (in_words_to_go
> 4)
14212 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
14213 TRUE
, srcbase
, &srcoffset
));
14215 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
14216 src
, FALSE
, srcbase
,
14219 if (out_words_to_go
)
14221 if (out_words_to_go
> 4)
14222 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
14223 TRUE
, dstbase
, &dstoffset
));
14224 else if (out_words_to_go
!= 1)
14225 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
14226 out_words_to_go
, dst
,
14229 dstbase
, &dstoffset
));
14232 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14233 emit_move_insn (mem
, gen_rtx_REG (SImode
, R0_REGNUM
));
14234 if (last_bytes
!= 0)
14236 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
14242 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
14243 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
14246 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14247 if (out_words_to_go
)
14251 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14252 sreg
= copy_to_reg (mem
);
14254 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
14255 emit_move_insn (mem
, sreg
);
14258 gcc_assert (!in_words_to_go
); /* Sanity check */
14261 if (in_words_to_go
)
14263 gcc_assert (in_words_to_go
> 0);
14265 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
14266 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
14269 gcc_assert (!last_bytes
|| part_bytes_reg
);
14271 if (BYTES_BIG_ENDIAN
&& last_bytes
)
14273 rtx tmp
= gen_reg_rtx (SImode
);
14275 /* The bytes we want are in the top end of the word. */
14276 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
14277 GEN_INT (8 * (4 - last_bytes
))));
14278 part_bytes_reg
= tmp
;
14282 mem
= adjust_automodify_address (dstbase
, QImode
,
14283 plus_constant (Pmode
, dst
,
14285 dstoffset
+ last_bytes
- 1);
14286 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14290 tmp
= gen_reg_rtx (SImode
);
14291 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
14292 part_bytes_reg
= tmp
;
14299 if (last_bytes
> 1)
14301 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
14302 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
14306 rtx tmp
= gen_reg_rtx (SImode
);
14307 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
14308 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
14309 part_bytes_reg
= tmp
;
14316 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
14317 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
14324 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14327 next_consecutive_mem (rtx mem
)
14329 machine_mode mode
= GET_MODE (mem
);
14330 HOST_WIDE_INT offset
= GET_MODE_SIZE (mode
);
14331 rtx addr
= plus_constant (Pmode
, XEXP (mem
, 0), offset
);
14333 return adjust_automodify_address (mem
, mode
, addr
, offset
);
14336 /* Copy using LDRD/STRD instructions whenever possible.
14337 Returns true upon success. */
14339 gen_movmem_ldrd_strd (rtx
*operands
)
14341 unsigned HOST_WIDE_INT len
;
14342 HOST_WIDE_INT align
;
14343 rtx src
, dst
, base
;
14345 bool src_aligned
, dst_aligned
;
14346 bool src_volatile
, dst_volatile
;
14348 gcc_assert (CONST_INT_P (operands
[2]));
14349 gcc_assert (CONST_INT_P (operands
[3]));
14351 len
= UINTVAL (operands
[2]);
14355 /* Maximum alignment we can assume for both src and dst buffers. */
14356 align
= INTVAL (operands
[3]);
14358 if ((!unaligned_access
) && (len
>= 4) && ((align
& 3) != 0))
14361 /* Place src and dst addresses in registers
14362 and update the corresponding mem rtx. */
14364 dst_volatile
= MEM_VOLATILE_P (dst
);
14365 dst_aligned
= MEM_ALIGN (dst
) >= BITS_PER_WORD
;
14366 base
= copy_to_mode_reg (SImode
, XEXP (dst
, 0));
14367 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
14370 src_volatile
= MEM_VOLATILE_P (src
);
14371 src_aligned
= MEM_ALIGN (src
) >= BITS_PER_WORD
;
14372 base
= copy_to_mode_reg (SImode
, XEXP (src
, 0));
14373 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
14375 if (!unaligned_access
&& !(src_aligned
&& dst_aligned
))
14378 if (src_volatile
|| dst_volatile
)
14381 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14382 if (!(dst_aligned
|| src_aligned
))
14383 return arm_gen_movmemqi (operands
);
14385 /* If the either src or dst is unaligned we'll be accessing it as pairs
14386 of unaligned SImode accesses. Otherwise we can generate DImode
14387 ldrd/strd instructions. */
14388 src
= adjust_address (src
, src_aligned
? DImode
: SImode
, 0);
14389 dst
= adjust_address (dst
, dst_aligned
? DImode
: SImode
, 0);
14394 reg0
= gen_reg_rtx (DImode
);
14395 rtx low_reg
= NULL_RTX
;
14396 rtx hi_reg
= NULL_RTX
;
14398 if (!src_aligned
|| !dst_aligned
)
14400 low_reg
= gen_lowpart (SImode
, reg0
);
14401 hi_reg
= gen_highpart_mode (SImode
, DImode
, reg0
);
14404 emit_move_insn (reg0
, src
);
14407 emit_insn (gen_unaligned_loadsi (low_reg
, src
));
14408 src
= next_consecutive_mem (src
);
14409 emit_insn (gen_unaligned_loadsi (hi_reg
, src
));
14413 emit_move_insn (dst
, reg0
);
14416 emit_insn (gen_unaligned_storesi (dst
, low_reg
));
14417 dst
= next_consecutive_mem (dst
);
14418 emit_insn (gen_unaligned_storesi (dst
, hi_reg
));
14421 src
= next_consecutive_mem (src
);
14422 dst
= next_consecutive_mem (dst
);
14425 gcc_assert (len
< 8);
14428 /* More than a word but less than a double-word to copy. Copy a word. */
14429 reg0
= gen_reg_rtx (SImode
);
14430 src
= adjust_address (src
, SImode
, 0);
14431 dst
= adjust_address (dst
, SImode
, 0);
14433 emit_move_insn (reg0
, src
);
14435 emit_insn (gen_unaligned_loadsi (reg0
, src
));
14438 emit_move_insn (dst
, reg0
);
14440 emit_insn (gen_unaligned_storesi (dst
, reg0
));
14442 src
= next_consecutive_mem (src
);
14443 dst
= next_consecutive_mem (dst
);
14450 /* Copy the remaining bytes. */
14453 dst
= adjust_address (dst
, HImode
, 0);
14454 src
= adjust_address (src
, HImode
, 0);
14455 reg0
= gen_reg_rtx (SImode
);
14457 emit_insn (gen_zero_extendhisi2 (reg0
, src
));
14459 emit_insn (gen_unaligned_loadhiu (reg0
, src
));
14462 emit_insn (gen_movhi (dst
, gen_lowpart(HImode
, reg0
)));
14464 emit_insn (gen_unaligned_storehi (dst
, gen_lowpart (HImode
, reg0
)));
14466 src
= next_consecutive_mem (src
);
14467 dst
= next_consecutive_mem (dst
);
14472 dst
= adjust_address (dst
, QImode
, 0);
14473 src
= adjust_address (src
, QImode
, 0);
14474 reg0
= gen_reg_rtx (QImode
);
14475 emit_move_insn (reg0
, src
);
14476 emit_move_insn (dst
, reg0
);
14480 /* Select a dominance comparison mode if possible for a test of the general
14481 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14482 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14483 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14484 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14485 In all cases OP will be either EQ or NE, but we don't need to know which
14486 here. If we are unable to support a dominance comparison we return
14487 CC mode. This will then fail to match for the RTL expressions that
14488 generate this call. */
14490 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
14492 enum rtx_code cond1
, cond2
;
14495 /* Currently we will probably get the wrong result if the individual
14496 comparisons are not simple. This also ensures that it is safe to
14497 reverse a comparison if necessary. */
14498 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
14500 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
14504 /* The if_then_else variant of this tests the second condition if the
14505 first passes, but is true if the first fails. Reverse the first
14506 condition to get a true "inclusive-or" expression. */
14507 if (cond_or
== DOM_CC_NX_OR_Y
)
14508 cond1
= reverse_condition (cond1
);
14510 /* If the comparisons are not equal, and one doesn't dominate the other,
14511 then we can't do this. */
14513 && !comparison_dominates_p (cond1
, cond2
)
14514 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
14518 std::swap (cond1
, cond2
);
14523 if (cond_or
== DOM_CC_X_AND_Y
)
14528 case EQ
: return CC_DEQmode
;
14529 case LE
: return CC_DLEmode
;
14530 case LEU
: return CC_DLEUmode
;
14531 case GE
: return CC_DGEmode
;
14532 case GEU
: return CC_DGEUmode
;
14533 default: gcc_unreachable ();
14537 if (cond_or
== DOM_CC_X_AND_Y
)
14549 gcc_unreachable ();
14553 if (cond_or
== DOM_CC_X_AND_Y
)
14565 gcc_unreachable ();
14569 if (cond_or
== DOM_CC_X_AND_Y
)
14570 return CC_DLTUmode
;
14575 return CC_DLTUmode
;
14577 return CC_DLEUmode
;
14581 gcc_unreachable ();
14585 if (cond_or
== DOM_CC_X_AND_Y
)
14586 return CC_DGTUmode
;
14591 return CC_DGTUmode
;
14593 return CC_DGEUmode
;
14597 gcc_unreachable ();
14600 /* The remaining cases only occur when both comparisons are the
14603 gcc_assert (cond1
== cond2
);
14607 gcc_assert (cond1
== cond2
);
14611 gcc_assert (cond1
== cond2
);
14615 gcc_assert (cond1
== cond2
);
14616 return CC_DLEUmode
;
14619 gcc_assert (cond1
== cond2
);
14620 return CC_DGEUmode
;
14623 gcc_unreachable ();
14628 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
14630 /* All floating point compares return CCFP if it is an equality
14631 comparison, and CCFPE otherwise. */
14632 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14655 gcc_unreachable ();
14659 /* A compare with a shifted operand. Because of canonicalization, the
14660 comparison will have to be swapped when we emit the assembler. */
14661 if (GET_MODE (y
) == SImode
14662 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14663 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14664 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
14665 || GET_CODE (x
) == ROTATERT
))
14668 /* This operation is performed swapped, but since we only rely on the Z
14669 flag we don't need an additional mode. */
14670 if (GET_MODE (y
) == SImode
14671 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
14672 && GET_CODE (x
) == NEG
14673 && (op
== EQ
|| op
== NE
))
14676 /* This is a special case that is used by combine to allow a
14677 comparison of a shifted byte load to be split into a zero-extend
14678 followed by a comparison of the shifted integer (only valid for
14679 equalities and unsigned inequalities). */
14680 if (GET_MODE (x
) == SImode
14681 && GET_CODE (x
) == ASHIFT
14682 && CONST_INT_P (XEXP (x
, 1)) && INTVAL (XEXP (x
, 1)) == 24
14683 && GET_CODE (XEXP (x
, 0)) == SUBREG
14684 && MEM_P (SUBREG_REG (XEXP (x
, 0)))
14685 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
14686 && (op
== EQ
|| op
== NE
14687 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
14688 && CONST_INT_P (y
))
14691 /* A construct for a conditional compare, if the false arm contains
14692 0, then both conditions must be true, otherwise either condition
14693 must be true. Not all conditions are possible, so CCmode is
14694 returned if it can't be done. */
14695 if (GET_CODE (x
) == IF_THEN_ELSE
14696 && (XEXP (x
, 2) == const0_rtx
14697 || XEXP (x
, 2) == const1_rtx
)
14698 && COMPARISON_P (XEXP (x
, 0))
14699 && COMPARISON_P (XEXP (x
, 1)))
14700 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14701 INTVAL (XEXP (x
, 2)));
14703 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14704 if (GET_CODE (x
) == AND
14705 && (op
== EQ
|| op
== NE
)
14706 && COMPARISON_P (XEXP (x
, 0))
14707 && COMPARISON_P (XEXP (x
, 1)))
14708 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14711 if (GET_CODE (x
) == IOR
14712 && (op
== EQ
|| op
== NE
)
14713 && COMPARISON_P (XEXP (x
, 0))
14714 && COMPARISON_P (XEXP (x
, 1)))
14715 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
14718 /* An operation (on Thumb) where we want to test for a single bit.
14719 This is done by shifting that bit up into the top bit of a
14720 scratch register; we can then branch on the sign bit. */
14722 && GET_MODE (x
) == SImode
14723 && (op
== EQ
|| op
== NE
)
14724 && GET_CODE (x
) == ZERO_EXTRACT
14725 && XEXP (x
, 1) == const1_rtx
)
14728 /* An operation that sets the condition codes as a side-effect, the
14729 V flag is not set correctly, so we can only use comparisons where
14730 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14732 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14733 if (GET_MODE (x
) == SImode
14735 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
14736 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
14737 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
14738 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
14739 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
14740 || GET_CODE (x
) == LSHIFTRT
14741 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
14742 || GET_CODE (x
) == ROTATERT
14743 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
14744 return CC_NOOVmode
;
14746 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
14749 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
14750 && GET_CODE (x
) == PLUS
14751 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
14754 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
14760 /* A DImode comparison against zero can be implemented by
14761 or'ing the two halves together. */
14762 if (y
== const0_rtx
)
14765 /* We can do an equality test in three Thumb instructions. */
14775 /* DImode unsigned comparisons can be implemented by cmp +
14776 cmpeq without a scratch register. Not worth doing in
14787 /* DImode signed and unsigned comparisons can be implemented
14788 by cmp + sbcs with a scratch register, but that does not
14789 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14790 gcc_assert (op
!= EQ
&& op
!= NE
);
14794 gcc_unreachable ();
14798 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
)
14799 return GET_MODE (x
);
14804 /* X and Y are two things to compare using CODE. Emit the compare insn and
14805 return the rtx for register 0 in the proper mode. FP means this is a
14806 floating point compare: I don't think that it is needed on the arm. */
14808 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
, rtx scratch
)
14812 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
14814 /* We might have X as a constant, Y as a register because of the predicates
14815 used for cmpdi. If so, force X to a register here. */
14816 if (dimode_comparison
&& !REG_P (x
))
14817 x
= force_reg (DImode
, x
);
14819 mode
= SELECT_CC_MODE (code
, x
, y
);
14820 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
14822 if (dimode_comparison
14823 && mode
!= CC_CZmode
)
14827 /* To compare two non-zero values for equality, XOR them and
14828 then compare against zero. Not used for ARM mode; there
14829 CC_CZmode is cheaper. */
14830 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
14832 gcc_assert (!reload_completed
);
14833 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
14837 /* A scratch register is required. */
14838 if (reload_completed
)
14839 gcc_assert (scratch
!= NULL
&& GET_MODE (scratch
) == SImode
);
14841 scratch
= gen_rtx_SCRATCH (SImode
);
14843 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
14844 set
= gen_rtx_SET (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14845 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
14848 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
14853 /* Generate a sequence of insns that will generate the correct return
14854 address mask depending on the physical architecture that the program
14857 arm_gen_return_addr_mask (void)
14859 rtx reg
= gen_reg_rtx (Pmode
);
14861 emit_insn (gen_return_addr_mask (reg
));
14866 arm_reload_in_hi (rtx
*operands
)
14868 rtx ref
= operands
[1];
14870 HOST_WIDE_INT offset
= 0;
14872 if (GET_CODE (ref
) == SUBREG
)
14874 offset
= SUBREG_BYTE (ref
);
14875 ref
= SUBREG_REG (ref
);
14880 /* We have a pseudo which has been spilt onto the stack; there
14881 are two cases here: the first where there is a simple
14882 stack-slot replacement and a second where the stack-slot is
14883 out of range, or is used as a subreg. */
14884 if (reg_equiv_mem (REGNO (ref
)))
14886 ref
= reg_equiv_mem (REGNO (ref
));
14887 base
= find_replacement (&XEXP (ref
, 0));
14890 /* The slot is out of range, or was dressed up in a SUBREG. */
14891 base
= reg_equiv_address (REGNO (ref
));
14893 /* PR 62554: If there is no equivalent memory location then just move
14894 the value as an SImode register move. This happens when the target
14895 architecture variant does not have an HImode register move. */
14898 gcc_assert (REG_P (operands
[0]));
14899 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14900 gen_rtx_SUBREG (SImode
, ref
, 0)));
14905 base
= find_replacement (&XEXP (ref
, 0));
14907 /* Handle the case where the address is too complex to be offset by 1. */
14908 if (GET_CODE (base
) == MINUS
14909 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
14911 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14913 emit_set_insn (base_plus
, base
);
14916 else if (GET_CODE (base
) == PLUS
)
14918 /* The addend must be CONST_INT, or we would have dealt with it above. */
14919 HOST_WIDE_INT hi
, lo
;
14921 offset
+= INTVAL (XEXP (base
, 1));
14922 base
= XEXP (base
, 0);
14924 /* Rework the address into a legal sequence of insns. */
14925 /* Valid range for lo is -4095 -> 4095 */
14928 : -((-offset
) & 0xfff));
14930 /* Corner case, if lo is the max offset then we would be out of range
14931 once we have added the additional 1 below, so bump the msb into the
14932 pre-loading insn(s). */
14936 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
14937 ^ (HOST_WIDE_INT
) 0x80000000)
14938 - (HOST_WIDE_INT
) 0x80000000);
14940 gcc_assert (hi
+ lo
== offset
);
14944 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14946 /* Get the base address; addsi3 knows how to handle constants
14947 that require more than one insn. */
14948 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
14954 /* Operands[2] may overlap operands[0] (though it won't overlap
14955 operands[1]), that's why we asked for a DImode reg -- so we can
14956 use the bit that does not overlap. */
14957 if (REGNO (operands
[2]) == REGNO (operands
[0]))
14958 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
14960 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
14962 emit_insn (gen_zero_extendqisi2 (scratch
,
14963 gen_rtx_MEM (QImode
,
14964 plus_constant (Pmode
, base
,
14966 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14967 gen_rtx_MEM (QImode
,
14968 plus_constant (Pmode
, base
,
14970 if (!BYTES_BIG_ENDIAN
)
14971 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14972 gen_rtx_IOR (SImode
,
14975 gen_rtx_SUBREG (SImode
, operands
[0], 0),
14979 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
14980 gen_rtx_IOR (SImode
,
14981 gen_rtx_ASHIFT (SImode
, scratch
,
14983 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
14986 /* Handle storing a half-word to memory during reload by synthesizing as two
14987 byte stores. Take care not to clobber the input values until after we
14988 have moved them somewhere safe. This code assumes that if the DImode
14989 scratch in operands[2] overlaps either the input value or output address
14990 in some way, then that value must die in this insn (we absolutely need
14991 two scratch registers for some corner cases). */
14993 arm_reload_out_hi (rtx
*operands
)
14995 rtx ref
= operands
[0];
14996 rtx outval
= operands
[1];
14998 HOST_WIDE_INT offset
= 0;
15000 if (GET_CODE (ref
) == SUBREG
)
15002 offset
= SUBREG_BYTE (ref
);
15003 ref
= SUBREG_REG (ref
);
15008 /* We have a pseudo which has been spilt onto the stack; there
15009 are two cases here: the first where there is a simple
15010 stack-slot replacement and a second where the stack-slot is
15011 out of range, or is used as a subreg. */
15012 if (reg_equiv_mem (REGNO (ref
)))
15014 ref
= reg_equiv_mem (REGNO (ref
));
15015 base
= find_replacement (&XEXP (ref
, 0));
15018 /* The slot is out of range, or was dressed up in a SUBREG. */
15019 base
= reg_equiv_address (REGNO (ref
));
15021 /* PR 62254: If there is no equivalent memory location then just move
15022 the value as an SImode register move. This happens when the target
15023 architecture variant does not have an HImode register move. */
15026 gcc_assert (REG_P (outval
) || SUBREG_P (outval
));
15028 if (REG_P (outval
))
15030 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15031 gen_rtx_SUBREG (SImode
, outval
, 0)));
15033 else /* SUBREG_P (outval) */
15035 if (GET_MODE (SUBREG_REG (outval
)) == SImode
)
15036 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode
, ref
, 0),
15037 SUBREG_REG (outval
)));
15039 /* FIXME: Handle other cases ? */
15040 gcc_unreachable ();
15046 base
= find_replacement (&XEXP (ref
, 0));
15048 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
15050 /* Handle the case where the address is too complex to be offset by 1. */
15051 if (GET_CODE (base
) == MINUS
15052 || (GET_CODE (base
) == PLUS
&& !CONST_INT_P (XEXP (base
, 1))))
15054 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15056 /* Be careful not to destroy OUTVAL. */
15057 if (reg_overlap_mentioned_p (base_plus
, outval
))
15059 /* Updating base_plus might destroy outval, see if we can
15060 swap the scratch and base_plus. */
15061 if (!reg_overlap_mentioned_p (scratch
, outval
))
15062 std::swap (scratch
, base_plus
);
15065 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15067 /* Be conservative and copy OUTVAL into the scratch now,
15068 this should only be necessary if outval is a subreg
15069 of something larger than a word. */
15070 /* XXX Might this clobber base? I can't see how it can,
15071 since scratch is known to overlap with OUTVAL, and
15072 must be wider than a word. */
15073 emit_insn (gen_movhi (scratch_hi
, outval
));
15074 outval
= scratch_hi
;
15078 emit_set_insn (base_plus
, base
);
15081 else if (GET_CODE (base
) == PLUS
)
15083 /* The addend must be CONST_INT, or we would have dealt with it above. */
15084 HOST_WIDE_INT hi
, lo
;
15086 offset
+= INTVAL (XEXP (base
, 1));
15087 base
= XEXP (base
, 0);
15089 /* Rework the address into a legal sequence of insns. */
15090 /* Valid range for lo is -4095 -> 4095 */
15093 : -((-offset
) & 0xfff));
15095 /* Corner case, if lo is the max offset then we would be out of range
15096 once we have added the additional 1 below, so bump the msb into the
15097 pre-loading insn(s). */
15101 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
15102 ^ (HOST_WIDE_INT
) 0x80000000)
15103 - (HOST_WIDE_INT
) 0x80000000);
15105 gcc_assert (hi
+ lo
== offset
);
15109 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
15111 /* Be careful not to destroy OUTVAL. */
15112 if (reg_overlap_mentioned_p (base_plus
, outval
))
15114 /* Updating base_plus might destroy outval, see if we
15115 can swap the scratch and base_plus. */
15116 if (!reg_overlap_mentioned_p (scratch
, outval
))
15117 std::swap (scratch
, base_plus
);
15120 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
15122 /* Be conservative and copy outval into scratch now,
15123 this should only be necessary if outval is a
15124 subreg of something larger than a word. */
15125 /* XXX Might this clobber base? I can't see how it
15126 can, since scratch is known to overlap with
15128 emit_insn (gen_movhi (scratch_hi
, outval
));
15129 outval
= scratch_hi
;
15133 /* Get the base address; addsi3 knows how to handle constants
15134 that require more than one insn. */
15135 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
15141 if (BYTES_BIG_ENDIAN
)
15143 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15144 plus_constant (Pmode
, base
,
15146 gen_lowpart (QImode
, outval
)));
15147 emit_insn (gen_lshrsi3 (scratch
,
15148 gen_rtx_SUBREG (SImode
, outval
, 0),
15150 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15152 gen_lowpart (QImode
, scratch
)));
15156 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, base
,
15158 gen_lowpart (QImode
, outval
)));
15159 emit_insn (gen_lshrsi3 (scratch
,
15160 gen_rtx_SUBREG (SImode
, outval
, 0),
15162 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
15163 plus_constant (Pmode
, base
,
15165 gen_lowpart (QImode
, scratch
)));
15169 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15170 (padded to the size of a word) should be passed in a register. */
15173 arm_must_pass_in_stack (machine_mode mode
, const_tree type
)
15175 if (TARGET_AAPCS_BASED
)
15176 return must_pass_in_stack_var_size (mode
, type
);
15178 return must_pass_in_stack_var_size_or_pad (mode
, type
);
15182 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15183 byte of a stack argument has useful data. For legacy APCS ABIs we use
15184 the default. For AAPCS based ABIs small aggregate types are placed
15185 in the lowest memory address. */
15187 static pad_direction
15188 arm_function_arg_padding (machine_mode mode
, const_tree type
)
15190 if (!TARGET_AAPCS_BASED
)
15191 return default_function_arg_padding (mode
, type
);
15193 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
15194 return PAD_DOWNWARD
;
15200 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15201 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15202 register has useful data, and return the opposite if the most
15203 significant byte does. */
15206 arm_pad_reg_upward (machine_mode mode
,
15207 tree type
, int first ATTRIBUTE_UNUSED
)
15209 if (TARGET_AAPCS_BASED
&& BYTES_BIG_ENDIAN
)
15211 /* For AAPCS, small aggregates, small fixed-point types,
15212 and small complex types are always padded upwards. */
15215 if ((AGGREGATE_TYPE_P (type
)
15216 || TREE_CODE (type
) == COMPLEX_TYPE
15217 || FIXED_POINT_TYPE_P (type
))
15218 && int_size_in_bytes (type
) <= 4)
15223 if ((COMPLEX_MODE_P (mode
) || ALL_FIXED_POINT_MODE_P (mode
))
15224 && GET_MODE_SIZE (mode
) <= 4)
15229 /* Otherwise, use default padding. */
15230 return !BYTES_BIG_ENDIAN
;
15233 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15234 assuming that the address in the base register is word aligned. */
15236 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset
)
15238 HOST_WIDE_INT max_offset
;
15240 /* Offset must be a multiple of 4 in Thumb mode. */
15241 if (TARGET_THUMB2
&& ((offset
& 3) != 0))
15246 else if (TARGET_ARM
)
15251 return ((offset
<= max_offset
) && (offset
>= -max_offset
));
15254 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15255 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15256 Assumes that the address in the base register RN is word aligned. Pattern
15257 guarantees that both memory accesses use the same base register,
15258 the offsets are constants within the range, and the gap between the offsets is 4.
15259 If preload complete then check that registers are legal. WBACK indicates whether
15260 address is updated. LOAD indicates whether memory access is load or store. */
15262 operands_ok_ldrd_strd (rtx rt
, rtx rt2
, rtx rn
, HOST_WIDE_INT offset
,
15263 bool wback
, bool load
)
15265 unsigned int t
, t2
, n
;
15267 if (!reload_completed
)
15270 if (!offset_ok_for_ldrd_strd (offset
))
15277 if ((TARGET_THUMB2
)
15278 && ((wback
&& (n
== t
|| n
== t2
))
15279 || (t
== SP_REGNUM
)
15280 || (t
== PC_REGNUM
)
15281 || (t2
== SP_REGNUM
)
15282 || (t2
== PC_REGNUM
)
15283 || (!load
&& (n
== PC_REGNUM
))
15284 || (load
&& (t
== t2
))
15285 /* Triggers Cortex-M3 LDRD errata. */
15286 || (!wback
&& load
&& fix_cm3_ldrd
&& (n
== t
))))
15290 && ((wback
&& (n
== t
|| n
== t2
))
15291 || (t2
== PC_REGNUM
)
15292 || (t
% 2 != 0) /* First destination register is not even. */
15294 /* PC can be used as base register (for offset addressing only),
15295 but it is depricated. */
15296 || (n
== PC_REGNUM
)))
15302 /* Return true if a 64-bit access with alignment ALIGN and with a
15303 constant offset OFFSET from the base pointer is permitted on this
15306 align_ok_ldrd_strd (HOST_WIDE_INT align
, HOST_WIDE_INT offset
)
15308 return (unaligned_access
15309 ? (align
>= BITS_PER_WORD
&& (offset
& 3) == 0)
15310 : (align
>= 2 * BITS_PER_WORD
&& (offset
& 7) == 0));
15313 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15314 operand MEM's address contains an immediate offset from the base
15315 register and has no side effects, in which case it sets BASE,
15316 OFFSET and ALIGN accordingly. */
15318 mem_ok_for_ldrd_strd (rtx mem
, rtx
*base
, rtx
*offset
, HOST_WIDE_INT
*align
)
15322 gcc_assert (base
!= NULL
&& offset
!= NULL
);
15324 /* TODO: Handle more general memory operand patterns, such as
15325 PRE_DEC and PRE_INC. */
15327 if (side_effects_p (mem
))
15330 /* Can't deal with subregs. */
15331 if (GET_CODE (mem
) == SUBREG
)
15334 gcc_assert (MEM_P (mem
));
15336 *offset
= const0_rtx
;
15337 *align
= MEM_ALIGN (mem
);
15339 addr
= XEXP (mem
, 0);
15341 /* If addr isn't valid for DImode, then we can't handle it. */
15342 if (!arm_legitimate_address_p (DImode
, addr
,
15343 reload_in_progress
|| reload_completed
))
15351 else if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == MINUS
)
15353 *base
= XEXP (addr
, 0);
15354 *offset
= XEXP (addr
, 1);
15355 return (REG_P (*base
) && CONST_INT_P (*offset
));
15361 /* Called from a peephole2 to replace two word-size accesses with a
15362 single LDRD/STRD instruction. Returns true iff we can generate a
15363 new instruction sequence. That is, both accesses use the same base
15364 register and the gap between constant offsets is 4. This function
15365 may reorder its operands to match ldrd/strd RTL templates.
15366 OPERANDS are the operands found by the peephole matcher;
15367 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15368 corresponding memory operands. LOAD indicaates whether the access
15369 is load or store. CONST_STORE indicates a store of constant
15370 integer values held in OPERANDS[4,5] and assumes that the pattern
15371 is of length 4 insn, for the purpose of checking dead registers.
15372 COMMUTE indicates that register operands may be reordered. */
15374 gen_operands_ldrd_strd (rtx
*operands
, bool load
,
15375 bool const_store
, bool commute
)
15378 HOST_WIDE_INT offsets
[2], offset
, align
[2];
15379 rtx base
= NULL_RTX
;
15380 rtx cur_base
, cur_offset
, tmp
;
15382 HARD_REG_SET regset
;
15384 gcc_assert (!const_store
|| !load
);
15385 /* Check that the memory references are immediate offsets from the
15386 same base register. Extract the base register, the destination
15387 registers, and the corresponding memory offsets. */
15388 for (i
= 0; i
< nops
; i
++)
15390 if (!mem_ok_for_ldrd_strd (operands
[nops
+i
], &cur_base
, &cur_offset
,
15396 else if (REGNO (base
) != REGNO (cur_base
))
15399 offsets
[i
] = INTVAL (cur_offset
);
15400 if (GET_CODE (operands
[i
]) == SUBREG
)
15402 tmp
= SUBREG_REG (operands
[i
]);
15403 gcc_assert (GET_MODE (operands
[i
]) == GET_MODE (tmp
));
15408 /* Make sure there is no dependency between the individual loads. */
15409 if (load
&& REGNO (operands
[0]) == REGNO (base
))
15410 return false; /* RAW */
15412 if (load
&& REGNO (operands
[0]) == REGNO (operands
[1]))
15413 return false; /* WAW */
15415 /* If the same input register is used in both stores
15416 when storing different constants, try to find a free register.
15417 For example, the code
15422 can be transformed into
15426 in Thumb mode assuming that r1 is free.
15427 For ARM mode do the same but only if the starting register
15428 can be made to be even. */
15430 && REGNO (operands
[0]) == REGNO (operands
[1])
15431 && INTVAL (operands
[4]) != INTVAL (operands
[5]))
15435 CLEAR_HARD_REG_SET (regset
);
15436 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15437 if (tmp
== NULL_RTX
)
15440 /* Use the new register in the first load to ensure that
15441 if the original input register is not dead after peephole,
15442 then it will have the correct constant value. */
15445 else if (TARGET_ARM
)
15447 int regno
= REGNO (operands
[0]);
15448 if (!peep2_reg_dead_p (4, operands
[0]))
15450 /* When the input register is even and is not dead after the
15451 pattern, it has to hold the second constant but we cannot
15452 form a legal STRD in ARM mode with this register as the second
15454 if (regno
% 2 == 0)
15457 /* Is regno-1 free? */
15458 SET_HARD_REG_SET (regset
);
15459 CLEAR_HARD_REG_BIT(regset
, regno
- 1);
15460 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15461 if (tmp
== NULL_RTX
)
15468 /* Find a DImode register. */
15469 CLEAR_HARD_REG_SET (regset
);
15470 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15471 if (tmp
!= NULL_RTX
)
15473 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15474 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15478 /* Can we use the input register to form a DI register? */
15479 SET_HARD_REG_SET (regset
);
15480 CLEAR_HARD_REG_BIT(regset
,
15481 regno
% 2 == 0 ? regno
+ 1 : regno
- 1);
15482 tmp
= peep2_find_free_register (0, 4, "r", SImode
, ®set
);
15483 if (tmp
== NULL_RTX
)
15485 operands
[regno
% 2 == 1 ? 0 : 1] = tmp
;
15489 gcc_assert (operands
[0] != NULL_RTX
);
15490 gcc_assert (operands
[1] != NULL_RTX
);
15491 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15492 gcc_assert (REGNO (operands
[1]) == REGNO (operands
[0]) + 1);
15496 /* Make sure the instructions are ordered with lower memory access first. */
15497 if (offsets
[0] > offsets
[1])
15499 gap
= offsets
[0] - offsets
[1];
15500 offset
= offsets
[1];
15502 /* Swap the instructions such that lower memory is accessed first. */
15503 std::swap (operands
[0], operands
[1]);
15504 std::swap (operands
[2], operands
[3]);
15505 std::swap (align
[0], align
[1]);
15507 std::swap (operands
[4], operands
[5]);
15511 gap
= offsets
[1] - offsets
[0];
15512 offset
= offsets
[0];
15515 /* Make sure accesses are to consecutive memory locations. */
15519 if (!align_ok_ldrd_strd (align
[0], offset
))
15522 /* Make sure we generate legal instructions. */
15523 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15527 /* In Thumb state, where registers are almost unconstrained, there
15528 is little hope to fix it. */
15532 if (load
&& commute
)
15534 /* Try reordering registers. */
15535 std::swap (operands
[0], operands
[1]);
15536 if (operands_ok_ldrd_strd (operands
[0], operands
[1], base
, offset
,
15543 /* If input registers are dead after this pattern, they can be
15544 reordered or replaced by other registers that are free in the
15545 current pattern. */
15546 if (!peep2_reg_dead_p (4, operands
[0])
15547 || !peep2_reg_dead_p (4, operands
[1]))
15550 /* Try to reorder the input registers. */
15551 /* For example, the code
15556 can be transformed into
15561 if (operands_ok_ldrd_strd (operands
[1], operands
[0], base
, offset
,
15564 std::swap (operands
[0], operands
[1]);
15568 /* Try to find a free DI register. */
15569 CLEAR_HARD_REG_SET (regset
);
15570 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[0]));
15571 add_to_hard_reg_set (®set
, SImode
, REGNO (operands
[1]));
15574 tmp
= peep2_find_free_register (0, 4, "r", DImode
, ®set
);
15575 if (tmp
== NULL_RTX
)
15578 /* DREG must be an even-numbered register in DImode.
15579 Split it into SI registers. */
15580 operands
[0] = simplify_gen_subreg (SImode
, tmp
, DImode
, 0);
15581 operands
[1] = simplify_gen_subreg (SImode
, tmp
, DImode
, 4);
15582 gcc_assert (operands
[0] != NULL_RTX
);
15583 gcc_assert (operands
[1] != NULL_RTX
);
15584 gcc_assert (REGNO (operands
[0]) % 2 == 0);
15585 gcc_assert (REGNO (operands
[0]) + 1 == REGNO (operands
[1]));
15587 return (operands_ok_ldrd_strd (operands
[0], operands
[1],
15599 /* Print a symbolic form of X to the debug file, F. */
15601 arm_print_value (FILE *f
, rtx x
)
15603 switch (GET_CODE (x
))
15606 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
15610 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
15618 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
15620 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
15621 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
15629 fprintf (f
, "\"%s\"", XSTR (x
, 0));
15633 fprintf (f
, "`%s'", XSTR (x
, 0));
15637 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
15641 arm_print_value (f
, XEXP (x
, 0));
15645 arm_print_value (f
, XEXP (x
, 0));
15647 arm_print_value (f
, XEXP (x
, 1));
15655 fprintf (f
, "????");
15660 /* Routines for manipulation of the constant pool. */
15662 /* Arm instructions cannot load a large constant directly into a
15663 register; they have to come from a pc relative load. The constant
15664 must therefore be placed in the addressable range of the pc
15665 relative load. Depending on the precise pc relative load
15666 instruction the range is somewhere between 256 bytes and 4k. This
15667 means that we often have to dump a constant inside a function, and
15668 generate code to branch around it.
15670 It is important to minimize this, since the branches will slow
15671 things down and make the code larger.
15673 Normally we can hide the table after an existing unconditional
15674 branch so that there is no interruption of the flow, but in the
15675 worst case the code looks like this:
15693 We fix this by performing a scan after scheduling, which notices
15694 which instructions need to have their operands fetched from the
15695 constant table and builds the table.
15697 The algorithm starts by building a table of all the constants that
15698 need fixing up and all the natural barriers in the function (places
15699 where a constant table can be dropped without breaking the flow).
15700 For each fixup we note how far the pc-relative replacement will be
15701 able to reach and the offset of the instruction into the function.
15703 Having built the table we then group the fixes together to form
15704 tables that are as large as possible (subject to addressing
15705 constraints) and emit each table of constants after the last
15706 barrier that is within range of all the instructions in the group.
15707 If a group does not contain a barrier, then we forcibly create one
15708 by inserting a jump instruction into the flow. Once the table has
15709 been inserted, the insns are then modified to reference the
15710 relevant entry in the pool.
15712 Possible enhancements to the algorithm (not implemented) are:
15714 1) For some processors and object formats, there may be benefit in
15715 aligning the pools to the start of cache lines; this alignment
15716 would need to be taken into account when calculating addressability
15719 /* These typedefs are located at the start of this file, so that
15720 they can be used in the prototypes there. This comment is to
15721 remind readers of that fact so that the following structures
15722 can be understood more easily.
15724 typedef struct minipool_node Mnode;
15725 typedef struct minipool_fixup Mfix; */
15727 struct minipool_node
15729 /* Doubly linked chain of entries. */
15732 /* The maximum offset into the code that this entry can be placed. While
15733 pushing fixes for forward references, all entries are sorted in order
15734 of increasing max_address. */
15735 HOST_WIDE_INT max_address
;
15736 /* Similarly for an entry inserted for a backwards ref. */
15737 HOST_WIDE_INT min_address
;
15738 /* The number of fixes referencing this entry. This can become zero
15739 if we "unpush" an entry. In this case we ignore the entry when we
15740 come to emit the code. */
15742 /* The offset from the start of the minipool. */
15743 HOST_WIDE_INT offset
;
15744 /* The value in table. */
15746 /* The mode of value. */
15748 /* The size of the value. With iWMMXt enabled
15749 sizes > 4 also imply an alignment of 8-bytes. */
15753 struct minipool_fixup
15757 HOST_WIDE_INT address
;
15763 HOST_WIDE_INT forwards
;
15764 HOST_WIDE_INT backwards
;
15767 /* Fixes less than a word need padding out to a word boundary. */
15768 #define MINIPOOL_FIX_SIZE(mode) \
15769 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15771 static Mnode
* minipool_vector_head
;
15772 static Mnode
* minipool_vector_tail
;
15773 static rtx_code_label
*minipool_vector_label
;
15774 static int minipool_pad
;
15776 /* The linked list of all minipool fixes required for this function. */
15777 Mfix
* minipool_fix_head
;
15778 Mfix
* minipool_fix_tail
;
15779 /* The fix entry for the current minipool, once it has been placed. */
15780 Mfix
* minipool_barrier
;
15782 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15783 #define JUMP_TABLES_IN_TEXT_SECTION 0
15786 static HOST_WIDE_INT
15787 get_jump_table_size (rtx_jump_table_data
*insn
)
15789 /* ADDR_VECs only take room if read-only data does into the text
15791 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
15793 rtx body
= PATTERN (insn
);
15794 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
15795 HOST_WIDE_INT size
;
15796 HOST_WIDE_INT modesize
;
15798 modesize
= GET_MODE_SIZE (GET_MODE (body
));
15799 size
= modesize
* XVECLEN (body
, elt
);
15803 /* Round up size of TBB table to a halfword boundary. */
15804 size
= (size
+ 1) & ~HOST_WIDE_INT_1
;
15807 /* No padding necessary for TBH. */
15810 /* Add two bytes for alignment on Thumb. */
15815 gcc_unreachable ();
15823 /* Return the maximum amount of padding that will be inserted before
15826 static HOST_WIDE_INT
15827 get_label_padding (rtx label
)
15829 HOST_WIDE_INT align
, min_insn_size
;
15831 align
= 1 << label_to_alignment (label
);
15832 min_insn_size
= TARGET_THUMB
? 2 : 4;
15833 return align
> min_insn_size
? align
- min_insn_size
: 0;
15836 /* Move a minipool fix MP from its current location to before MAX_MP.
15837 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15838 constraints may need updating. */
15840 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
15841 HOST_WIDE_INT max_address
)
15843 /* The code below assumes these are different. */
15844 gcc_assert (mp
!= max_mp
);
15846 if (max_mp
== NULL
)
15848 if (max_address
< mp
->max_address
)
15849 mp
->max_address
= max_address
;
15853 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15854 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15856 mp
->max_address
= max_address
;
15858 /* Unlink MP from its current position. Since max_mp is non-null,
15859 mp->prev must be non-null. */
15860 mp
->prev
->next
= mp
->next
;
15861 if (mp
->next
!= NULL
)
15862 mp
->next
->prev
= mp
->prev
;
15864 minipool_vector_tail
= mp
->prev
;
15866 /* Re-insert it before MAX_MP. */
15868 mp
->prev
= max_mp
->prev
;
15871 if (mp
->prev
!= NULL
)
15872 mp
->prev
->next
= mp
;
15874 minipool_vector_head
= mp
;
15877 /* Save the new entry. */
15880 /* Scan over the preceding entries and adjust their addresses as
15882 while (mp
->prev
!= NULL
15883 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
15885 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
15892 /* Add a constant to the minipool for a forward reference. Returns the
15893 node added or NULL if the constant will not fit in this pool. */
15895 add_minipool_forward_ref (Mfix
*fix
)
15897 /* If set, max_mp is the first pool_entry that has a lower
15898 constraint than the one we are trying to add. */
15899 Mnode
* max_mp
= NULL
;
15900 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
15903 /* If the minipool starts before the end of FIX->INSN then this FIX
15904 can not be placed into the current pool. Furthermore, adding the
15905 new constant pool entry may cause the pool to start FIX_SIZE bytes
15907 if (minipool_vector_head
&&
15908 (fix
->address
+ get_attr_length (fix
->insn
)
15909 >= minipool_vector_head
->max_address
- fix
->fix_size
))
15912 /* Scan the pool to see if a constant with the same value has
15913 already been added. While we are doing this, also note the
15914 location where we must insert the constant if it doesn't already
15916 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
15918 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
15919 && fix
->mode
== mp
->mode
15920 && (!LABEL_P (fix
->value
)
15921 || (CODE_LABEL_NUMBER (fix
->value
)
15922 == CODE_LABEL_NUMBER (mp
->value
)))
15923 && rtx_equal_p (fix
->value
, mp
->value
))
15925 /* More than one fix references this entry. */
15927 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
15930 /* Note the insertion point if necessary. */
15932 && mp
->max_address
> max_address
)
15935 /* If we are inserting an 8-bytes aligned quantity and
15936 we have not already found an insertion point, then
15937 make sure that all such 8-byte aligned quantities are
15938 placed at the start of the pool. */
15939 if (ARM_DOUBLEWORD_ALIGN
15941 && fix
->fix_size
>= 8
15942 && mp
->fix_size
< 8)
15945 max_address
= mp
->max_address
;
15949 /* The value is not currently in the minipool, so we need to create
15950 a new entry for it. If MAX_MP is NULL, the entry will be put on
15951 the end of the list since the placement is less constrained than
15952 any existing entry. Otherwise, we insert the new fix before
15953 MAX_MP and, if necessary, adjust the constraints on the other
15956 mp
->fix_size
= fix
->fix_size
;
15957 mp
->mode
= fix
->mode
;
15958 mp
->value
= fix
->value
;
15960 /* Not yet required for a backwards ref. */
15961 mp
->min_address
= -65536;
15963 if (max_mp
== NULL
)
15965 mp
->max_address
= max_address
;
15967 mp
->prev
= minipool_vector_tail
;
15969 if (mp
->prev
== NULL
)
15971 minipool_vector_head
= mp
;
15972 minipool_vector_label
= gen_label_rtx ();
15975 mp
->prev
->next
= mp
;
15977 minipool_vector_tail
= mp
;
15981 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
15982 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
15984 mp
->max_address
= max_address
;
15987 mp
->prev
= max_mp
->prev
;
15989 if (mp
->prev
!= NULL
)
15990 mp
->prev
->next
= mp
;
15992 minipool_vector_head
= mp
;
15995 /* Save the new entry. */
15998 /* Scan over the preceding entries and adjust their addresses as
16000 while (mp
->prev
!= NULL
16001 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
16003 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
16011 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
16012 HOST_WIDE_INT min_address
)
16014 HOST_WIDE_INT offset
;
16016 /* The code below assumes these are different. */
16017 gcc_assert (mp
!= min_mp
);
16019 if (min_mp
== NULL
)
16021 if (min_address
> mp
->min_address
)
16022 mp
->min_address
= min_address
;
16026 /* We will adjust this below if it is too loose. */
16027 mp
->min_address
= min_address
;
16029 /* Unlink MP from its current position. Since min_mp is non-null,
16030 mp->next must be non-null. */
16031 mp
->next
->prev
= mp
->prev
;
16032 if (mp
->prev
!= NULL
)
16033 mp
->prev
->next
= mp
->next
;
16035 minipool_vector_head
= mp
->next
;
16037 /* Reinsert it after MIN_MP. */
16039 mp
->next
= min_mp
->next
;
16041 if (mp
->next
!= NULL
)
16042 mp
->next
->prev
= mp
;
16044 minipool_vector_tail
= mp
;
16050 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16052 mp
->offset
= offset
;
16053 if (mp
->refcount
> 0)
16054 offset
+= mp
->fix_size
;
16056 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16057 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16063 /* Add a constant to the minipool for a backward reference. Returns the
16064 node added or NULL if the constant will not fit in this pool.
16066 Note that the code for insertion for a backwards reference can be
16067 somewhat confusing because the calculated offsets for each fix do
16068 not take into account the size of the pool (which is still under
16071 add_minipool_backward_ref (Mfix
*fix
)
16073 /* If set, min_mp is the last pool_entry that has a lower constraint
16074 than the one we are trying to add. */
16075 Mnode
*min_mp
= NULL
;
16076 /* This can be negative, since it is only a constraint. */
16077 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
16080 /* If we can't reach the current pool from this insn, or if we can't
16081 insert this entry at the end of the pool without pushing other
16082 fixes out of range, then we don't try. This ensures that we
16083 can't fail later on. */
16084 if (min_address
>= minipool_barrier
->address
16085 || (minipool_vector_tail
->min_address
+ fix
->fix_size
16086 >= minipool_barrier
->address
))
16089 /* Scan the pool to see if a constant with the same value has
16090 already been added. While we are doing this, also note the
16091 location where we must insert the constant if it doesn't already
16093 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
16095 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
16096 && fix
->mode
== mp
->mode
16097 && (!LABEL_P (fix
->value
)
16098 || (CODE_LABEL_NUMBER (fix
->value
)
16099 == CODE_LABEL_NUMBER (mp
->value
)))
16100 && rtx_equal_p (fix
->value
, mp
->value
)
16101 /* Check that there is enough slack to move this entry to the
16102 end of the table (this is conservative). */
16103 && (mp
->max_address
16104 > (minipool_barrier
->address
16105 + minipool_vector_tail
->offset
16106 + minipool_vector_tail
->fix_size
)))
16109 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
16112 if (min_mp
!= NULL
)
16113 mp
->min_address
+= fix
->fix_size
;
16116 /* Note the insertion point if necessary. */
16117 if (mp
->min_address
< min_address
)
16119 /* For now, we do not allow the insertion of 8-byte alignment
16120 requiring nodes anywhere but at the start of the pool. */
16121 if (ARM_DOUBLEWORD_ALIGN
16122 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16127 else if (mp
->max_address
16128 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
16130 /* Inserting before this entry would push the fix beyond
16131 its maximum address (which can happen if we have
16132 re-located a forwards fix); force the new fix to come
16134 if (ARM_DOUBLEWORD_ALIGN
16135 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
16140 min_address
= mp
->min_address
+ fix
->fix_size
;
16143 /* Do not insert a non-8-byte aligned quantity before 8-byte
16144 aligned quantities. */
16145 else if (ARM_DOUBLEWORD_ALIGN
16146 && fix
->fix_size
< 8
16147 && mp
->fix_size
>= 8)
16150 min_address
= mp
->min_address
+ fix
->fix_size
;
16155 /* We need to create a new entry. */
16157 mp
->fix_size
= fix
->fix_size
;
16158 mp
->mode
= fix
->mode
;
16159 mp
->value
= fix
->value
;
16161 mp
->max_address
= minipool_barrier
->address
+ 65536;
16163 mp
->min_address
= min_address
;
16165 if (min_mp
== NULL
)
16168 mp
->next
= minipool_vector_head
;
16170 if (mp
->next
== NULL
)
16172 minipool_vector_tail
= mp
;
16173 minipool_vector_label
= gen_label_rtx ();
16176 mp
->next
->prev
= mp
;
16178 minipool_vector_head
= mp
;
16182 mp
->next
= min_mp
->next
;
16186 if (mp
->next
!= NULL
)
16187 mp
->next
->prev
= mp
;
16189 minipool_vector_tail
= mp
;
16192 /* Save the new entry. */
16200 /* Scan over the following entries and adjust their offsets. */
16201 while (mp
->next
!= NULL
)
16203 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
16204 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
16207 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
16209 mp
->next
->offset
= mp
->offset
;
16218 assign_minipool_offsets (Mfix
*barrier
)
16220 HOST_WIDE_INT offset
= 0;
16223 minipool_barrier
= barrier
;
16225 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16227 mp
->offset
= offset
;
16229 if (mp
->refcount
> 0)
16230 offset
+= mp
->fix_size
;
16234 /* Output the literal table */
16236 dump_minipool (rtx_insn
*scan
)
16242 if (ARM_DOUBLEWORD_ALIGN
)
16243 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
16244 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
16251 fprintf (dump_file
,
16252 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16253 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
16255 scan
= emit_label_after (gen_label_rtx (), scan
);
16256 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
16257 scan
= emit_label_after (minipool_vector_label
, scan
);
16259 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
16261 if (mp
->refcount
> 0)
16265 fprintf (dump_file
,
16266 ";; Offset %u, min %ld, max %ld ",
16267 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
16268 (unsigned long) mp
->max_address
);
16269 arm_print_value (dump_file
, mp
->value
);
16270 fputc ('\n', dump_file
);
16273 rtx val
= copy_rtx (mp
->value
);
16275 switch (GET_MODE_SIZE (mp
->mode
))
16277 #ifdef HAVE_consttable_1
16279 scan
= emit_insn_after (gen_consttable_1 (val
), scan
);
16283 #ifdef HAVE_consttable_2
16285 scan
= emit_insn_after (gen_consttable_2 (val
), scan
);
16289 #ifdef HAVE_consttable_4
16291 scan
= emit_insn_after (gen_consttable_4 (val
), scan
);
16295 #ifdef HAVE_consttable_8
16297 scan
= emit_insn_after (gen_consttable_8 (val
), scan
);
16301 #ifdef HAVE_consttable_16
16303 scan
= emit_insn_after (gen_consttable_16 (val
), scan
);
16308 gcc_unreachable ();
16316 minipool_vector_head
= minipool_vector_tail
= NULL
;
16317 scan
= emit_insn_after (gen_consttable_end (), scan
);
16318 scan
= emit_barrier_after (scan
);
16321 /* Return the cost of forcibly inserting a barrier after INSN. */
16323 arm_barrier_cost (rtx_insn
*insn
)
16325 /* Basing the location of the pool on the loop depth is preferable,
16326 but at the moment, the basic block information seems to be
16327 corrupt by this stage of the compilation. */
16328 int base_cost
= 50;
16329 rtx_insn
*next
= next_nonnote_insn (insn
);
16331 if (next
!= NULL
&& LABEL_P (next
))
16334 switch (GET_CODE (insn
))
16337 /* It will always be better to place the table before the label, rather
16346 return base_cost
- 10;
16349 return base_cost
+ 10;
16353 /* Find the best place in the insn stream in the range
16354 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16355 Create the barrier by inserting a jump and add a new fix entry for
16358 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
16360 HOST_WIDE_INT count
= 0;
16361 rtx_barrier
*barrier
;
16362 rtx_insn
*from
= fix
->insn
;
16363 /* The instruction after which we will insert the jump. */
16364 rtx_insn
*selected
= NULL
;
16366 /* The address at which the jump instruction will be placed. */
16367 HOST_WIDE_INT selected_address
;
16369 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
16370 rtx_code_label
*label
= gen_label_rtx ();
16372 selected_cost
= arm_barrier_cost (from
);
16373 selected_address
= fix
->address
;
16375 while (from
&& count
< max_count
)
16377 rtx_jump_table_data
*tmp
;
16380 /* This code shouldn't have been called if there was a natural barrier
16382 gcc_assert (!BARRIER_P (from
));
16384 /* Count the length of this insn. This must stay in sync with the
16385 code that pushes minipool fixes. */
16386 if (LABEL_P (from
))
16387 count
+= get_label_padding (from
);
16389 count
+= get_attr_length (from
);
16391 /* If there is a jump table, add its length. */
16392 if (tablejump_p (from
, NULL
, &tmp
))
16394 count
+= get_jump_table_size (tmp
);
16396 /* Jump tables aren't in a basic block, so base the cost on
16397 the dispatch insn. If we select this location, we will
16398 still put the pool after the table. */
16399 new_cost
= arm_barrier_cost (from
);
16401 if (count
< max_count
16402 && (!selected
|| new_cost
<= selected_cost
))
16405 selected_cost
= new_cost
;
16406 selected_address
= fix
->address
+ count
;
16409 /* Continue after the dispatch table. */
16410 from
= NEXT_INSN (tmp
);
16414 new_cost
= arm_barrier_cost (from
);
16416 if (count
< max_count
16417 && (!selected
|| new_cost
<= selected_cost
))
16420 selected_cost
= new_cost
;
16421 selected_address
= fix
->address
+ count
;
16424 from
= NEXT_INSN (from
);
16427 /* Make sure that we found a place to insert the jump. */
16428 gcc_assert (selected
);
16430 /* Make sure we do not split a call and its corresponding
16431 CALL_ARG_LOCATION note. */
16432 if (CALL_P (selected
))
16434 rtx_insn
*next
= NEXT_INSN (selected
);
16435 if (next
&& NOTE_P (next
)
16436 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
16440 /* Create a new JUMP_INSN that branches around a barrier. */
16441 from
= emit_jump_insn_after (gen_jump (label
), selected
);
16442 JUMP_LABEL (from
) = label
;
16443 barrier
= emit_barrier_after (from
);
16444 emit_label_after (label
, barrier
);
16446 /* Create a minipool barrier entry for the new barrier. */
16447 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
16448 new_fix
->insn
= barrier
;
16449 new_fix
->address
= selected_address
;
16450 new_fix
->next
= fix
->next
;
16451 fix
->next
= new_fix
;
16456 /* Record that there is a natural barrier in the insn stream at
16459 push_minipool_barrier (rtx_insn
*insn
, HOST_WIDE_INT address
)
16461 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16464 fix
->address
= address
;
16467 if (minipool_fix_head
!= NULL
)
16468 minipool_fix_tail
->next
= fix
;
16470 minipool_fix_head
= fix
;
16472 minipool_fix_tail
= fix
;
16475 /* Record INSN, which will need fixing up to load a value from the
16476 minipool. ADDRESS is the offset of the insn since the start of the
16477 function; LOC is a pointer to the part of the insn which requires
16478 fixing; VALUE is the constant that must be loaded, which is of type
16481 push_minipool_fix (rtx_insn
*insn
, HOST_WIDE_INT address
, rtx
*loc
,
16482 machine_mode mode
, rtx value
)
16484 gcc_assert (!arm_disable_literal_pool
);
16485 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
16488 fix
->address
= address
;
16491 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
16492 fix
->value
= value
;
16493 fix
->forwards
= get_attr_pool_range (insn
);
16494 fix
->backwards
= get_attr_neg_pool_range (insn
);
16495 fix
->minipool
= NULL
;
16497 /* If an insn doesn't have a range defined for it, then it isn't
16498 expecting to be reworked by this code. Better to stop now than
16499 to generate duff assembly code. */
16500 gcc_assert (fix
->forwards
|| fix
->backwards
);
16502 /* If an entry requires 8-byte alignment then assume all constant pools
16503 require 4 bytes of padding. Trying to do this later on a per-pool
16504 basis is awkward because existing pool entries have to be modified. */
16505 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
16510 fprintf (dump_file
,
16511 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16512 GET_MODE_NAME (mode
),
16513 INSN_UID (insn
), (unsigned long) address
,
16514 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
16515 arm_print_value (dump_file
, fix
->value
);
16516 fprintf (dump_file
, "\n");
16519 /* Add it to the chain of fixes. */
16522 if (minipool_fix_head
!= NULL
)
16523 minipool_fix_tail
->next
= fix
;
16525 minipool_fix_head
= fix
;
16527 minipool_fix_tail
= fix
;
16530 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16531 Returns the number of insns needed, or 99 if we always want to synthesize
16534 arm_max_const_double_inline_cost ()
16536 return ((optimize_size
|| arm_ld_sched
) ? 3 : 4);
16539 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16540 Returns the number of insns needed, or 99 if we don't know how to
16543 arm_const_double_inline_cost (rtx val
)
16545 rtx lowpart
, highpart
;
16548 mode
= GET_MODE (val
);
16550 if (mode
== VOIDmode
)
16553 gcc_assert (GET_MODE_SIZE (mode
) == 8);
16555 lowpart
= gen_lowpart (SImode
, val
);
16556 highpart
= gen_highpart_mode (SImode
, mode
, val
);
16558 gcc_assert (CONST_INT_P (lowpart
));
16559 gcc_assert (CONST_INT_P (highpart
));
16561 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
16562 NULL_RTX
, NULL_RTX
, 0, 0)
16563 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
16564 NULL_RTX
, NULL_RTX
, 0, 0));
16567 /* Cost of loading a SImode constant. */
16569 arm_const_inline_cost (enum rtx_code code
, rtx val
)
16571 return arm_gen_constant (code
, SImode
, NULL_RTX
, INTVAL (val
),
16572 NULL_RTX
, NULL_RTX
, 1, 0);
16575 /* Return true if it is worthwhile to split a 64-bit constant into two
16576 32-bit operations. This is the case if optimizing for size, or
16577 if we have load delay slots, or if one 32-bit part can be done with
16578 a single data operation. */
16580 arm_const_double_by_parts (rtx val
)
16582 machine_mode mode
= GET_MODE (val
);
16585 if (optimize_size
|| arm_ld_sched
)
16588 if (mode
== VOIDmode
)
16591 part
= gen_highpart_mode (SImode
, mode
, val
);
16593 gcc_assert (CONST_INT_P (part
));
16595 if (const_ok_for_arm (INTVAL (part
))
16596 || const_ok_for_arm (~INTVAL (part
)))
16599 part
= gen_lowpart (SImode
, val
);
16601 gcc_assert (CONST_INT_P (part
));
16603 if (const_ok_for_arm (INTVAL (part
))
16604 || const_ok_for_arm (~INTVAL (part
)))
16610 /* Return true if it is possible to inline both the high and low parts
16611 of a 64-bit constant into 32-bit data processing instructions. */
16613 arm_const_double_by_immediates (rtx val
)
16615 machine_mode mode
= GET_MODE (val
);
16618 if (mode
== VOIDmode
)
16621 part
= gen_highpart_mode (SImode
, mode
, val
);
16623 gcc_assert (CONST_INT_P (part
));
16625 if (!const_ok_for_arm (INTVAL (part
)))
16628 part
= gen_lowpart (SImode
, val
);
16630 gcc_assert (CONST_INT_P (part
));
16632 if (!const_ok_for_arm (INTVAL (part
)))
16638 /* Scan INSN and note any of its operands that need fixing.
16639 If DO_PUSHES is false we do not actually push any of the fixups
16642 note_invalid_constants (rtx_insn
*insn
, HOST_WIDE_INT address
, int do_pushes
)
16646 extract_constrain_insn (insn
);
16648 if (recog_data
.n_alternatives
== 0)
16651 /* Fill in recog_op_alt with information about the constraints of
16653 preprocess_constraints (insn
);
16655 const operand_alternative
*op_alt
= which_op_alt ();
16656 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
16658 /* Things we need to fix can only occur in inputs. */
16659 if (recog_data
.operand_type
[opno
] != OP_IN
)
16662 /* If this alternative is a memory reference, then any mention
16663 of constants in this alternative is really to fool reload
16664 into allowing us to accept one there. We need to fix them up
16665 now so that we output the right code. */
16666 if (op_alt
[opno
].memory_ok
)
16668 rtx op
= recog_data
.operand
[opno
];
16670 if (CONSTANT_P (op
))
16673 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
16674 recog_data
.operand_mode
[opno
], op
);
16676 else if (MEM_P (op
)
16677 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
16678 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
16682 rtx cop
= avoid_constant_pool_reference (op
);
16684 /* Casting the address of something to a mode narrower
16685 than a word can cause avoid_constant_pool_reference()
16686 to return the pool reference itself. That's no good to
16687 us here. Lets just hope that we can use the
16688 constant pool value directly. */
16690 cop
= get_pool_constant (XEXP (op
, 0));
16692 push_minipool_fix (insn
, address
,
16693 recog_data
.operand_loc
[opno
],
16694 recog_data
.operand_mode
[opno
], cop
);
16704 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16705 and unions in the context of ARMv8-M Security Extensions. It is used as a
16706 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16707 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16708 or four masks, depending on whether it is being computed for a
16709 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16710 respectively. The tree for the type of the argument or a field within an
16711 argument is passed in ARG_TYPE, the current register this argument or field
16712 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16713 argument or field starts at is passed in STARTING_BIT and the last used bit
16714 is kept in LAST_USED_BIT which is also updated accordingly. */
16716 static unsigned HOST_WIDE_INT
16717 comp_not_to_clear_mask_str_un (tree arg_type
, int * regno
,
16718 uint32_t * padding_bits_to_clear
,
16719 unsigned starting_bit
, int * last_used_bit
)
16722 unsigned HOST_WIDE_INT not_to_clear_reg_mask
= 0;
16724 if (TREE_CODE (arg_type
) == RECORD_TYPE
)
16726 unsigned current_bit
= starting_bit
;
16728 long int offset
, size
;
16731 field
= TYPE_FIELDS (arg_type
);
16734 /* The offset within a structure is always an offset from
16735 the start of that structure. Make sure we take that into the
16736 calculation of the register based offset that we use here. */
16737 offset
= starting_bit
;
16738 offset
+= TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field
), 0);
16741 /* This is the actual size of the field, for bitfields this is the
16742 bitfield width and not the container size. */
16743 size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16745 if (*last_used_bit
!= offset
)
16747 if (offset
< *last_used_bit
)
16749 /* This field's offset is before the 'last_used_bit', that
16750 means this field goes on the next register. So we need to
16751 pad the rest of the current register and increase the
16752 register number. */
16754 mask
= ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit
);
16757 padding_bits_to_clear
[*regno
] |= mask
;
16758 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16763 /* Otherwise we pad the bits between the last field's end and
16764 the start of the new field. */
16767 mask
= ((uint32_t)-1) >> (32 - offset
);
16768 mask
-= ((uint32_t) 1 << *last_used_bit
) - 1;
16769 padding_bits_to_clear
[*regno
] |= mask
;
16771 current_bit
= offset
;
16774 /* Calculate further padding bits for inner structs/unions too. */
16775 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field
)))
16777 *last_used_bit
= current_bit
;
16778 not_to_clear_reg_mask
16779 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field
), regno
,
16780 padding_bits_to_clear
, offset
,
16785 /* Update 'current_bit' with this field's size. If the
16786 'current_bit' lies in a subsequent register, update 'regno' and
16787 reset 'current_bit' to point to the current bit in that new
16789 current_bit
+= size
;
16790 while (current_bit
>= 32)
16793 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16796 *last_used_bit
= current_bit
;
16799 field
= TREE_CHAIN (field
);
16801 not_to_clear_reg_mask
|= HOST_WIDE_INT_1U
<< *regno
;
16803 else if (TREE_CODE (arg_type
) == UNION_TYPE
)
16805 tree field
, field_t
;
16806 int i
, regno_t
, field_size
;
16810 uint32_t padding_bits_to_clear_res
[NUM_ARG_REGS
]
16811 = {-1, -1, -1, -1};
16813 /* To compute the padding bits in a union we only consider bits as
16814 padding bits if they are always either a padding bit or fall outside a
16815 fields size for all fields in the union. */
16816 field
= TYPE_FIELDS (arg_type
);
16819 uint32_t padding_bits_to_clear_t
[NUM_ARG_REGS
]
16820 = {0U, 0U, 0U, 0U};
16821 int last_used_bit_t
= *last_used_bit
;
16823 field_t
= TREE_TYPE (field
);
16825 /* If the field's type is either a record or a union make sure to
16826 compute their padding bits too. */
16827 if (RECORD_OR_UNION_TYPE_P (field_t
))
16828 not_to_clear_reg_mask
16829 |= comp_not_to_clear_mask_str_un (field_t
, ®no_t
,
16830 &padding_bits_to_clear_t
[0],
16831 starting_bit
, &last_used_bit_t
);
16834 field_size
= TREE_INT_CST_ELT (DECL_SIZE (field
), 0);
16835 regno_t
= (field_size
/ 32) + *regno
;
16836 last_used_bit_t
= (starting_bit
+ field_size
) % 32;
16839 for (i
= *regno
; i
< regno_t
; i
++)
16841 /* For all but the last register used by this field only keep the
16842 padding bits that were padding bits in this field. */
16843 padding_bits_to_clear_res
[i
] &= padding_bits_to_clear_t
[i
];
16846 /* For the last register, keep all padding bits that were padding
16847 bits in this field and any padding bits that are still valid
16848 as padding bits but fall outside of this field's size. */
16849 mask
= (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t
)) + 1;
16850 padding_bits_to_clear_res
[regno_t
]
16851 &= padding_bits_to_clear_t
[regno_t
] | mask
;
16853 /* Update the maximum size of the fields in terms of registers used
16854 ('max_reg') and the 'last_used_bit' in said register. */
16855 if (max_reg
< regno_t
)
16858 max_bit
= last_used_bit_t
;
16860 else if (max_reg
== regno_t
&& max_bit
< last_used_bit_t
)
16861 max_bit
= last_used_bit_t
;
16863 field
= TREE_CHAIN (field
);
16866 /* Update the current padding_bits_to_clear using the intersection of the
16867 padding bits of all the fields. */
16868 for (i
=*regno
; i
< max_reg
; i
++)
16869 padding_bits_to_clear
[i
] |= padding_bits_to_clear_res
[i
];
16871 /* Do not keep trailing padding bits, we do not know yet whether this
16872 is the end of the argument. */
16873 mask
= ((uint32_t) 1 << max_bit
) - 1;
16874 padding_bits_to_clear
[max_reg
]
16875 |= padding_bits_to_clear_res
[max_reg
] & mask
;
16878 *last_used_bit
= max_bit
;
16881 /* This function should only be used for structs and unions. */
16882 gcc_unreachable ();
16884 return not_to_clear_reg_mask
;
16887 /* In the context of ARMv8-M Security Extensions, this function is used for both
16888 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16889 registers are used when returning or passing arguments, which is then
16890 returned as a mask. It will also compute a mask to indicate padding/unused
16891 bits for each of these registers, and passes this through the
16892 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16893 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16894 the starting register used to pass this argument or return value is passed
16895 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16896 for struct and union types. */
16898 static unsigned HOST_WIDE_INT
16899 compute_not_to_clear_mask (tree arg_type
, rtx arg_rtx
, int regno
,
16900 uint32_t * padding_bits_to_clear
)
16903 int last_used_bit
= 0;
16904 unsigned HOST_WIDE_INT not_to_clear_mask
;
16906 if (RECORD_OR_UNION_TYPE_P (arg_type
))
16909 = comp_not_to_clear_mask_str_un (arg_type
, ®no
,
16910 padding_bits_to_clear
, 0,
16914 /* If the 'last_used_bit' is not zero, that means we are still using a
16915 part of the last 'regno'. In such cases we must clear the trailing
16916 bits. Otherwise we are not using regno and we should mark it as to
16918 if (last_used_bit
!= 0)
16919 padding_bits_to_clear
[regno
]
16920 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit
) + 1;
16922 not_to_clear_mask
&= ~(HOST_WIDE_INT_1U
<< regno
);
16926 not_to_clear_mask
= 0;
16927 /* We are not dealing with structs nor unions. So these arguments may be
16928 passed in floating point registers too. In some cases a BLKmode is
16929 used when returning or passing arguments in multiple VFP registers. */
16930 if (GET_MODE (arg_rtx
) == BLKmode
)
16935 /* This should really only occur when dealing with the hard-float
16937 gcc_assert (TARGET_HARD_FLOAT_ABI
);
16939 for (i
= 0; i
< XVECLEN (arg_rtx
, 0); i
++)
16941 reg
= XEXP (XVECEXP (arg_rtx
, 0, i
), 0);
16942 gcc_assert (REG_P (reg
));
16944 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (reg
);
16946 /* If we are dealing with DF mode, make sure we don't
16947 clear either of the registers it addresses. */
16948 arg_regs
= ARM_NUM_REGS (GET_MODE (reg
));
16951 unsigned HOST_WIDE_INT mask
;
16952 mask
= HOST_WIDE_INT_1U
<< (REGNO (reg
) + arg_regs
);
16953 mask
-= HOST_WIDE_INT_1U
<< REGNO (reg
);
16954 not_to_clear_mask
|= mask
;
16960 /* Otherwise we can rely on the MODE to determine how many registers
16961 are being used by this argument. */
16962 int arg_regs
= ARM_NUM_REGS (GET_MODE (arg_rtx
));
16963 not_to_clear_mask
|= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16966 unsigned HOST_WIDE_INT
16967 mask
= HOST_WIDE_INT_1U
<< (REGNO (arg_rtx
) + arg_regs
);
16968 mask
-= HOST_WIDE_INT_1U
<< REGNO (arg_rtx
);
16969 not_to_clear_mask
|= mask
;
16974 return not_to_clear_mask
;
16977 /* Clears caller saved registers not used to pass arguments before a
16978 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16979 registers is done in __gnu_cmse_nonsecure_call libcall.
16980 See libgcc/config/arm/cmse_nonsecure_call.S. */
16983 cmse_nonsecure_call_clear_caller_saved (void)
16987 FOR_EACH_BB_FN (bb
, cfun
)
16991 FOR_BB_INSNS (bb
, insn
)
16993 uint64_t to_clear_mask
, float_mask
;
16995 rtx pat
, call
, unspec
, reg
, cleared_reg
, tmp
;
16996 unsigned int regno
, maxregno
;
16998 CUMULATIVE_ARGS args_so_far_v
;
16999 cumulative_args_t args_so_far
;
17000 tree arg_type
, fntype
;
17001 bool using_r4
, first_param
= true;
17002 function_args_iterator args_iter
;
17003 uint32_t padding_bits_to_clear
[4] = {0U, 0U, 0U, 0U};
17004 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
[0];
17006 if (!NONDEBUG_INSN_P (insn
))
17009 if (!CALL_P (insn
))
17012 pat
= PATTERN (insn
);
17013 gcc_assert (GET_CODE (pat
) == PARALLEL
&& XVECLEN (pat
, 0) > 0);
17014 call
= XVECEXP (pat
, 0, 0);
17016 /* Get the real call RTX if the insn sets a value, ie. returns. */
17017 if (GET_CODE (call
) == SET
)
17018 call
= SET_SRC (call
);
17020 /* Check if it is a cmse_nonsecure_call. */
17021 unspec
= XEXP (call
, 0);
17022 if (GET_CODE (unspec
) != UNSPEC
17023 || XINT (unspec
, 1) != UNSPEC_NONSECURE_MEM
)
17026 /* Determine the caller-saved registers we need to clear. */
17027 to_clear_mask
= (1LL << (NUM_ARG_REGS
)) - 1;
17028 maxregno
= NUM_ARG_REGS
- 1;
17029 /* Only look at the caller-saved floating point registers in case of
17030 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17031 lazy store and loads which clear both caller- and callee-saved
17033 if (TARGET_HARD_FLOAT_ABI
)
17035 float_mask
= (1LL << (D7_VFP_REGNUM
+ 1)) - 1;
17036 float_mask
&= ~((1LL << FIRST_VFP_REGNUM
) - 1);
17037 to_clear_mask
|= float_mask
;
17038 maxregno
= D7_VFP_REGNUM
;
17041 /* Make sure the register used to hold the function address is not
17043 address
= RTVEC_ELT (XVEC (unspec
, 0), 0);
17044 gcc_assert (MEM_P (address
));
17045 gcc_assert (REG_P (XEXP (address
, 0)));
17046 to_clear_mask
&= ~(1LL << REGNO (XEXP (address
, 0)));
17048 /* Set basic block of call insn so that df rescan is performed on
17049 insns inserted here. */
17050 set_block_for_insn (insn
, bb
);
17051 df_set_flags (DF_DEFER_INSN_RESCAN
);
17054 /* Make sure the scheduler doesn't schedule other insns beyond
17056 emit_insn (gen_blockage ());
17058 /* Walk through all arguments and clear registers appropriately.
17060 fntype
= TREE_TYPE (MEM_EXPR (address
));
17061 arm_init_cumulative_args (&args_so_far_v
, fntype
, NULL_RTX
,
17063 args_so_far
= pack_cumulative_args (&args_so_far_v
);
17064 FOREACH_FUNCTION_ARGS (fntype
, arg_type
, args_iter
)
17067 machine_mode arg_mode
= TYPE_MODE (arg_type
);
17069 if (VOID_TYPE_P (arg_type
))
17073 arm_function_arg_advance (args_so_far
, arg_mode
, arg_type
,
17076 arg_rtx
= arm_function_arg (args_so_far
, arg_mode
, arg_type
,
17078 gcc_assert (REG_P (arg_rtx
));
17080 &= ~compute_not_to_clear_mask (arg_type
, arg_rtx
,
17082 padding_bits_to_clear_ptr
);
17084 first_param
= false;
17087 /* Clear padding bits where needed. */
17088 cleared_reg
= XEXP (address
, 0);
17089 reg
= gen_rtx_REG (SImode
, IP_REGNUM
);
17091 for (regno
= R0_REGNUM
; regno
< NUM_ARG_REGS
; regno
++)
17093 if (padding_bits_to_clear
[regno
] == 0)
17096 /* If this is a Thumb-1 target copy the address of the function
17097 we are calling from 'r4' into 'ip' such that we can use r4 to
17098 clear the unused bits in the arguments. */
17099 if (TARGET_THUMB1
&& !using_r4
)
17103 emit_move_insn (gen_rtx_REG (SImode
, IP_REGNUM
),
17107 tmp
= GEN_INT ((((~padding_bits_to_clear
[regno
]) << 16u) >> 16u));
17108 emit_move_insn (reg
, tmp
);
17109 /* Also fill the top half of the negated
17110 padding_bits_to_clear. */
17111 if (((~padding_bits_to_clear
[regno
]) >> 16) > 0)
17113 tmp
= GEN_INT ((~padding_bits_to_clear
[regno
]) >> 16);
17114 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg
,
17120 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, regno
),
17121 gen_rtx_REG (SImode
, regno
),
17126 emit_move_insn (cleared_reg
,
17127 gen_rtx_REG (SImode
, IP_REGNUM
));
17129 /* We use right shift and left shift to clear the LSB of the address
17130 we jump to instead of using bic, to avoid having to use an extra
17131 register on Thumb-1. */
17132 tmp
= gen_rtx_LSHIFTRT (SImode
, cleared_reg
, const1_rtx
);
17133 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17134 tmp
= gen_rtx_ASHIFT (SImode
, cleared_reg
, const1_rtx
);
17135 emit_insn (gen_rtx_SET (cleared_reg
, tmp
));
17137 /* Clearing all registers that leak before doing a non-secure
17139 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
17141 if (!(to_clear_mask
& (1LL << regno
)))
17144 /* If regno is an even vfp register and its successor is also to
17145 be cleared, use vmov. */
17146 if (IS_VFP_REGNUM (regno
))
17148 if (TARGET_VFP_DOUBLE
17149 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
17150 && to_clear_mask
& (1LL << (regno
+ 1)))
17151 emit_move_insn (gen_rtx_REG (DFmode
, regno
++),
17152 CONST0_RTX (DFmode
));
17154 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
17155 CONST0_RTX (SFmode
));
17158 emit_move_insn (gen_rtx_REG (SImode
, regno
), cleared_reg
);
17161 seq
= get_insns ();
17163 emit_insn_before (seq
, insn
);
17169 /* Rewrite move insn into subtract of 0 if the condition codes will
17170 be useful in next conditional jump insn. */
17173 thumb1_reorg (void)
17177 FOR_EACH_BB_FN (bb
, cfun
)
17180 rtx cmp
, op0
, op1
, set
= NULL
;
17181 rtx_insn
*prev
, *insn
= BB_END (bb
);
17182 bool insn_clobbered
= false;
17184 while (insn
!= BB_HEAD (bb
) && !NONDEBUG_INSN_P (insn
))
17185 insn
= PREV_INSN (insn
);
17187 /* Find the last cbranchsi4_insn in basic block BB. */
17188 if (insn
== BB_HEAD (bb
)
17189 || INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
17192 /* Get the register with which we are comparing. */
17193 cmp
= XEXP (SET_SRC (PATTERN (insn
)), 0);
17194 op0
= XEXP (cmp
, 0);
17195 op1
= XEXP (cmp
, 1);
17197 /* Check that comparison is against ZERO. */
17198 if (!CONST_INT_P (op1
) || INTVAL (op1
) != 0)
17201 /* Find the first flag setting insn before INSN in basic block BB. */
17202 gcc_assert (insn
!= BB_HEAD (bb
));
17203 for (prev
= PREV_INSN (insn
);
17205 && prev
!= BB_HEAD (bb
)
17207 || DEBUG_INSN_P (prev
)
17208 || ((set
= single_set (prev
)) != NULL
17209 && get_attr_conds (prev
) == CONDS_NOCOND
)));
17210 prev
= PREV_INSN (prev
))
17212 if (reg_set_p (op0
, prev
))
17213 insn_clobbered
= true;
17216 /* Skip if op0 is clobbered by insn other than prev. */
17217 if (insn_clobbered
)
17223 dest
= SET_DEST (set
);
17224 src
= SET_SRC (set
);
17225 if (!low_register_operand (dest
, SImode
)
17226 || !low_register_operand (src
, SImode
))
17229 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17230 in INSN. Both src and dest of the move insn are checked. */
17231 if (REGNO (op0
) == REGNO (src
) || REGNO (op0
) == REGNO (dest
))
17233 dest
= copy_rtx (dest
);
17234 src
= copy_rtx (src
);
17235 src
= gen_rtx_MINUS (SImode
, src
, const0_rtx
);
17236 PATTERN (prev
) = gen_rtx_SET (dest
, src
);
17237 INSN_CODE (prev
) = -1;
17238 /* Set test register in INSN to dest. */
17239 XEXP (cmp
, 0) = copy_rtx (dest
);
17240 INSN_CODE (insn
) = -1;
17245 /* Convert instructions to their cc-clobbering variant if possible, since
17246 that allows us to use smaller encodings. */
17249 thumb2_reorg (void)
17254 INIT_REG_SET (&live
);
17256 /* We are freeing block_for_insn in the toplev to keep compatibility
17257 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17258 compute_bb_for_insn ();
17261 enum Convert_Action
{SKIP
, CONV
, SWAP_CONV
};
17263 FOR_EACH_BB_FN (bb
, cfun
)
17265 if ((current_tune
->disparage_flag_setting_t16_encodings
17266 == tune_params::DISPARAGE_FLAGS_ALL
)
17267 && optimize_bb_for_speed_p (bb
))
17271 Convert_Action action
= SKIP
;
17272 Convert_Action action_for_partial_flag_setting
17273 = ((current_tune
->disparage_flag_setting_t16_encodings
17274 != tune_params::DISPARAGE_FLAGS_NEITHER
)
17275 && optimize_bb_for_speed_p (bb
))
17278 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
17279 df_simulate_initialize_backwards (bb
, &live
);
17280 FOR_BB_INSNS_REVERSE (bb
, insn
)
17282 if (NONJUMP_INSN_P (insn
)
17283 && !REGNO_REG_SET_P (&live
, CC_REGNUM
)
17284 && GET_CODE (PATTERN (insn
)) == SET
)
17287 rtx pat
= PATTERN (insn
);
17288 rtx dst
= XEXP (pat
, 0);
17289 rtx src
= XEXP (pat
, 1);
17290 rtx op0
= NULL_RTX
, op1
= NULL_RTX
;
17292 if (UNARY_P (src
) || BINARY_P (src
))
17293 op0
= XEXP (src
, 0);
17295 if (BINARY_P (src
))
17296 op1
= XEXP (src
, 1);
17298 if (low_register_operand (dst
, SImode
))
17300 switch (GET_CODE (src
))
17303 /* Adding two registers and storing the result
17304 in the first source is already a 16-bit
17306 if (rtx_equal_p (dst
, op0
)
17307 && register_operand (op1
, SImode
))
17310 if (low_register_operand (op0
, SImode
))
17312 /* ADDS <Rd>,<Rn>,<Rm> */
17313 if (low_register_operand (op1
, SImode
))
17315 /* ADDS <Rdn>,#<imm8> */
17316 /* SUBS <Rdn>,#<imm8> */
17317 else if (rtx_equal_p (dst
, op0
)
17318 && CONST_INT_P (op1
)
17319 && IN_RANGE (INTVAL (op1
), -255, 255))
17321 /* ADDS <Rd>,<Rn>,#<imm3> */
17322 /* SUBS <Rd>,<Rn>,#<imm3> */
17323 else if (CONST_INT_P (op1
)
17324 && IN_RANGE (INTVAL (op1
), -7, 7))
17327 /* ADCS <Rd>, <Rn> */
17328 else if (GET_CODE (XEXP (src
, 0)) == PLUS
17329 && rtx_equal_p (XEXP (XEXP (src
, 0), 0), dst
)
17330 && low_register_operand (XEXP (XEXP (src
, 0), 1),
17332 && COMPARISON_P (op1
)
17333 && cc_register (XEXP (op1
, 0), VOIDmode
)
17334 && maybe_get_arm_condition_code (op1
) == ARM_CS
17335 && XEXP (op1
, 1) == const0_rtx
)
17340 /* RSBS <Rd>,<Rn>,#0
17341 Not handled here: see NEG below. */
17342 /* SUBS <Rd>,<Rn>,#<imm3>
17344 Not handled here: see PLUS above. */
17345 /* SUBS <Rd>,<Rn>,<Rm> */
17346 if (low_register_operand (op0
, SImode
)
17347 && low_register_operand (op1
, SImode
))
17352 /* MULS <Rdm>,<Rn>,<Rdm>
17353 As an exception to the rule, this is only used
17354 when optimizing for size since MULS is slow on all
17355 known implementations. We do not even want to use
17356 MULS in cold code, if optimizing for speed, so we
17357 test the global flag here. */
17358 if (!optimize_size
)
17360 /* Fall through. */
17364 /* ANDS <Rdn>,<Rm> */
17365 if (rtx_equal_p (dst
, op0
)
17366 && low_register_operand (op1
, SImode
))
17367 action
= action_for_partial_flag_setting
;
17368 else if (rtx_equal_p (dst
, op1
)
17369 && low_register_operand (op0
, SImode
))
17370 action
= action_for_partial_flag_setting
== SKIP
17371 ? SKIP
: SWAP_CONV
;
17377 /* ASRS <Rdn>,<Rm> */
17378 /* LSRS <Rdn>,<Rm> */
17379 /* LSLS <Rdn>,<Rm> */
17380 if (rtx_equal_p (dst
, op0
)
17381 && low_register_operand (op1
, SImode
))
17382 action
= action_for_partial_flag_setting
;
17383 /* ASRS <Rd>,<Rm>,#<imm5> */
17384 /* LSRS <Rd>,<Rm>,#<imm5> */
17385 /* LSLS <Rd>,<Rm>,#<imm5> */
17386 else if (low_register_operand (op0
, SImode
)
17387 && CONST_INT_P (op1
)
17388 && IN_RANGE (INTVAL (op1
), 0, 31))
17389 action
= action_for_partial_flag_setting
;
17393 /* RORS <Rdn>,<Rm> */
17394 if (rtx_equal_p (dst
, op0
)
17395 && low_register_operand (op1
, SImode
))
17396 action
= action_for_partial_flag_setting
;
17400 /* MVNS <Rd>,<Rm> */
17401 if (low_register_operand (op0
, SImode
))
17402 action
= action_for_partial_flag_setting
;
17406 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17407 if (low_register_operand (op0
, SImode
))
17412 /* MOVS <Rd>,#<imm8> */
17413 if (CONST_INT_P (src
)
17414 && IN_RANGE (INTVAL (src
), 0, 255))
17415 action
= action_for_partial_flag_setting
;
17419 /* MOVS and MOV<c> with registers have different
17420 encodings, so are not relevant here. */
17428 if (action
!= SKIP
)
17430 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
17431 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
17434 if (action
== SWAP_CONV
)
17436 src
= copy_rtx (src
);
17437 XEXP (src
, 0) = op1
;
17438 XEXP (src
, 1) = op0
;
17439 pat
= gen_rtx_SET (dst
, src
);
17440 vec
= gen_rtvec (2, pat
, clobber
);
17442 else /* action == CONV */
17443 vec
= gen_rtvec (2, pat
, clobber
);
17445 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
17446 INSN_CODE (insn
) = -1;
17450 if (NONDEBUG_INSN_P (insn
))
17451 df_simulate_one_insn_backwards (bb
, insn
, &live
);
17455 CLEAR_REG_SET (&live
);
17458 /* Gcc puts the pool in the wrong place for ARM, since we can only
17459 load addresses a limited distance around the pc. We do some
17460 special munging to move the constant pool values to the correct
17461 point in the code. */
17466 HOST_WIDE_INT address
= 0;
17470 cmse_nonsecure_call_clear_caller_saved ();
17473 else if (TARGET_THUMB2
)
17476 /* Ensure all insns that must be split have been split at this point.
17477 Otherwise, the pool placement code below may compute incorrect
17478 insn lengths. Note that when optimizing, all insns have already
17479 been split at this point. */
17481 split_all_insns_noflow ();
17483 /* Make sure we do not attempt to create a literal pool even though it should
17484 no longer be necessary to create any. */
17485 if (arm_disable_literal_pool
)
17488 minipool_fix_head
= minipool_fix_tail
= NULL
;
17490 /* The first insn must always be a note, or the code below won't
17491 scan it properly. */
17492 insn
= get_insns ();
17493 gcc_assert (NOTE_P (insn
));
17496 /* Scan all the insns and record the operands that will need fixing. */
17497 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
17499 if (BARRIER_P (insn
))
17500 push_minipool_barrier (insn
, address
);
17501 else if (INSN_P (insn
))
17503 rtx_jump_table_data
*table
;
17505 note_invalid_constants (insn
, address
, true);
17506 address
+= get_attr_length (insn
);
17508 /* If the insn is a vector jump, add the size of the table
17509 and skip the table. */
17510 if (tablejump_p (insn
, NULL
, &table
))
17512 address
+= get_jump_table_size (table
);
17516 else if (LABEL_P (insn
))
17517 /* Add the worst-case padding due to alignment. We don't add
17518 the _current_ padding because the minipool insertions
17519 themselves might change it. */
17520 address
+= get_label_padding (insn
);
17523 fix
= minipool_fix_head
;
17525 /* Now scan the fixups and perform the required changes. */
17530 Mfix
* last_added_fix
;
17531 Mfix
* last_barrier
= NULL
;
17534 /* Skip any further barriers before the next fix. */
17535 while (fix
&& BARRIER_P (fix
->insn
))
17538 /* No more fixes. */
17542 last_added_fix
= NULL
;
17544 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
17546 if (BARRIER_P (ftmp
->insn
))
17548 if (ftmp
->address
>= minipool_vector_head
->max_address
)
17551 last_barrier
= ftmp
;
17553 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
17556 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
17559 /* If we found a barrier, drop back to that; any fixes that we
17560 could have reached but come after the barrier will now go in
17561 the next mini-pool. */
17562 if (last_barrier
!= NULL
)
17564 /* Reduce the refcount for those fixes that won't go into this
17566 for (fdel
= last_barrier
->next
;
17567 fdel
&& fdel
!= ftmp
;
17570 fdel
->minipool
->refcount
--;
17571 fdel
->minipool
= NULL
;
17574 ftmp
= last_barrier
;
17578 /* ftmp is first fix that we can't fit into this pool and
17579 there no natural barriers that we could use. Insert a
17580 new barrier in the code somewhere between the previous
17581 fix and this one, and arrange to jump around it. */
17582 HOST_WIDE_INT max_address
;
17584 /* The last item on the list of fixes must be a barrier, so
17585 we can never run off the end of the list of fixes without
17586 last_barrier being set. */
17589 max_address
= minipool_vector_head
->max_address
;
17590 /* Check that there isn't another fix that is in range that
17591 we couldn't fit into this pool because the pool was
17592 already too large: we need to put the pool before such an
17593 instruction. The pool itself may come just after the
17594 fix because create_fix_barrier also allows space for a
17595 jump instruction. */
17596 if (ftmp
->address
< max_address
)
17597 max_address
= ftmp
->address
+ 1;
17599 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
17602 assign_minipool_offsets (last_barrier
);
17606 if (!BARRIER_P (ftmp
->insn
)
17607 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
17614 /* Scan over the fixes we have identified for this pool, fixing them
17615 up and adding the constants to the pool itself. */
17616 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
17617 this_fix
= this_fix
->next
)
17618 if (!BARRIER_P (this_fix
->insn
))
17621 = plus_constant (Pmode
,
17622 gen_rtx_LABEL_REF (VOIDmode
,
17623 minipool_vector_label
),
17624 this_fix
->minipool
->offset
);
17625 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
17628 dump_minipool (last_barrier
->insn
);
17632 /* From now on we must synthesize any constants that we can't handle
17633 directly. This can happen if the RTL gets split during final
17634 instruction generation. */
17635 cfun
->machine
->after_arm_reorg
= 1;
17637 /* Free the minipool memory. */
17638 obstack_free (&minipool_obstack
, minipool_startobj
);
17641 /* Routines to output assembly language. */
17643 /* Return string representation of passed in real value. */
17644 static const char *
17645 fp_const_from_val (REAL_VALUE_TYPE
*r
)
17647 if (!fp_consts_inited
)
17650 gcc_assert (real_equal (r
, &value_fp0
));
17654 /* OPERANDS[0] is the entire list of insns that constitute pop,
17655 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17656 is in the list, UPDATE is true iff the list contains explicit
17657 update of base register. */
17659 arm_output_multireg_pop (rtx
*operands
, bool return_pc
, rtx cond
, bool reverse
,
17665 const char *conditional
;
17666 int num_saves
= XVECLEN (operands
[0], 0);
17667 unsigned int regno
;
17668 unsigned int regno_base
= REGNO (operands
[1]);
17669 bool interrupt_p
= IS_INTERRUPT (arm_current_func_type ());
17672 offset
+= update
? 1 : 0;
17673 offset
+= return_pc
? 1 : 0;
17675 /* Is the base register in the list? */
17676 for (i
= offset
; i
< num_saves
; i
++)
17678 regno
= REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0));
17679 /* If SP is in the list, then the base register must be SP. */
17680 gcc_assert ((regno
!= SP_REGNUM
) || (regno_base
== SP_REGNUM
));
17681 /* If base register is in the list, there must be no explicit update. */
17682 if (regno
== regno_base
)
17683 gcc_assert (!update
);
17686 conditional
= reverse
? "%?%D0" : "%?%d0";
17687 /* Can't use POP if returning from an interrupt. */
17688 if ((regno_base
== SP_REGNUM
) && update
&& !(interrupt_p
&& return_pc
))
17689 sprintf (pattern
, "pop%s\t{", conditional
);
17692 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17693 It's just a convention, their semantics are identical. */
17694 if (regno_base
== SP_REGNUM
)
17695 sprintf (pattern
, "ldmfd%s\t", conditional
);
17697 sprintf (pattern
, "ldmia%s\t", conditional
);
17699 sprintf (pattern
, "ldm%s\t", conditional
);
17701 strcat (pattern
, reg_names
[regno_base
]);
17703 strcat (pattern
, "!, {");
17705 strcat (pattern
, ", {");
17708 /* Output the first destination register. */
17710 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, offset
), 0))]);
17712 /* Output the rest of the destination registers. */
17713 for (i
= offset
+ 1; i
< num_saves
; i
++)
17715 strcat (pattern
, ", ");
17717 reg_names
[REGNO (XEXP (XVECEXP (operands
[0], 0, i
), 0))]);
17720 strcat (pattern
, "}");
17722 if (interrupt_p
&& return_pc
)
17723 strcat (pattern
, "^");
17725 output_asm_insn (pattern
, &cond
);
17729 /* Output the assembly for a store multiple. */
17732 vfp_output_vstmd (rtx
* operands
)
17738 rtx addr_reg
= REG_P (XEXP (operands
[0], 0))
17739 ? XEXP (operands
[0], 0)
17740 : XEXP (XEXP (operands
[0], 0), 0);
17741 bool push_p
= REGNO (addr_reg
) == SP_REGNUM
;
17744 strcpy (pattern
, "vpush%?.64\t{%P1");
17746 strcpy (pattern
, "vstmdb%?.64\t%m0!, {%P1");
17748 p
= strlen (pattern
);
17750 gcc_assert (REG_P (operands
[1]));
17752 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
17753 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
17755 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
17757 strcpy (&pattern
[p
], "}");
17759 output_asm_insn (pattern
, operands
);
17764 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17765 number of bytes pushed. */
17768 vfp_emit_fstmd (int base_reg
, int count
)
17775 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17776 register pairs are stored by a store multiple insn. We avoid this
17777 by pushing an extra pair. */
17778 if (count
== 2 && !arm_arch6
)
17780 if (base_reg
== LAST_VFP_REGNUM
- 3)
17785 /* FSTMD may not store more than 16 doubleword registers at once. Split
17786 larger stores into multiple parts (up to a maximum of two, in
17791 /* NOTE: base_reg is an internal register number, so each D register
17793 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
17794 saved
+= vfp_emit_fstmd (base_reg
, 16);
17798 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
17799 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
17801 reg
= gen_rtx_REG (DFmode
, base_reg
);
17804 XVECEXP (par
, 0, 0)
17805 = gen_rtx_SET (gen_frame_mem
17807 gen_rtx_PRE_MODIFY (Pmode
,
17810 (Pmode
, stack_pointer_rtx
,
17813 gen_rtx_UNSPEC (BLKmode
,
17814 gen_rtvec (1, reg
),
17815 UNSPEC_PUSH_MULT
));
17817 tmp
= gen_rtx_SET (stack_pointer_rtx
,
17818 plus_constant (Pmode
, stack_pointer_rtx
, -(count
* 8)));
17819 RTX_FRAME_RELATED_P (tmp
) = 1;
17820 XVECEXP (dwarf
, 0, 0) = tmp
;
17822 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
, stack_pointer_rtx
), reg
);
17823 RTX_FRAME_RELATED_P (tmp
) = 1;
17824 XVECEXP (dwarf
, 0, 1) = tmp
;
17826 for (i
= 1; i
< count
; i
++)
17828 reg
= gen_rtx_REG (DFmode
, base_reg
);
17830 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
17832 tmp
= gen_rtx_SET (gen_frame_mem (DFmode
,
17833 plus_constant (Pmode
,
17837 RTX_FRAME_RELATED_P (tmp
) = 1;
17838 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
17841 par
= emit_insn (par
);
17842 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
17843 RTX_FRAME_RELATED_P (par
) = 1;
17848 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17849 has the cmse_nonsecure_call attribute and returns false otherwise. */
17852 detect_cmse_nonsecure_call (tree addr
)
17857 tree fntype
= TREE_TYPE (addr
);
17858 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_call",
17859 TYPE_ATTRIBUTES (fntype
)))
17865 /* Emit a call instruction with pattern PAT. ADDR is the address of
17866 the call target. */
17869 arm_emit_call_insn (rtx pat
, rtx addr
, bool sibcall
)
17873 insn
= emit_call_insn (pat
);
17875 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17876 If the call might use such an entry, add a use of the PIC register
17877 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17878 if (TARGET_VXWORKS_RTP
17881 && GET_CODE (addr
) == SYMBOL_REF
17882 && (SYMBOL_REF_DECL (addr
)
17883 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
17884 : !SYMBOL_REF_LOCAL_P (addr
)))
17886 require_pic_register ();
17887 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
17890 if (TARGET_AAPCS_BASED
)
17892 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17893 linker. We need to add an IP clobber to allow setting
17894 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17895 is not needed since it's a fixed register. */
17896 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
17897 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP_REGNUM
));
17901 /* Output a 'call' insn. */
17903 output_call (rtx
*operands
)
17905 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
17907 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17908 if (REGNO (operands
[0]) == LR_REGNUM
)
17910 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
17911 output_asm_insn ("mov%?\t%0, %|lr", operands
);
17914 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
17916 if (TARGET_INTERWORK
|| arm_arch4t
)
17917 output_asm_insn ("bx%?\t%0", operands
);
17919 output_asm_insn ("mov%?\t%|pc, %0", operands
);
17924 /* Output a move from arm registers to arm registers of a long double
17925 OPERANDS[0] is the destination.
17926 OPERANDS[1] is the source. */
17928 output_mov_long_double_arm_from_arm (rtx
*operands
)
17930 /* We have to be careful here because the two might overlap. */
17931 int dest_start
= REGNO (operands
[0]);
17932 int src_start
= REGNO (operands
[1]);
17936 if (dest_start
< src_start
)
17938 for (i
= 0; i
< 3; i
++)
17940 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17941 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17942 output_asm_insn ("mov%?\t%0, %1", ops
);
17947 for (i
= 2; i
>= 0; i
--)
17949 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
17950 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
17951 output_asm_insn ("mov%?\t%0, %1", ops
);
17959 arm_emit_movpair (rtx dest
, rtx src
)
17961 /* If the src is an immediate, simplify it. */
17962 if (CONST_INT_P (src
))
17964 HOST_WIDE_INT val
= INTVAL (src
);
17965 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
17966 if ((val
>> 16) & 0x0000ffff)
17968 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
17970 GEN_INT ((val
>> 16) & 0x0000ffff));
17971 rtx_insn
*insn
= get_last_insn ();
17972 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17976 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
17977 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
17978 rtx_insn
*insn
= get_last_insn ();
17979 set_unique_reg_note (insn
, REG_EQUAL
, copy_rtx (src
));
17982 /* Output a move between double words. It must be REG<-MEM
17985 output_move_double (rtx
*operands
, bool emit
, int *count
)
17987 enum rtx_code code0
= GET_CODE (operands
[0]);
17988 enum rtx_code code1
= GET_CODE (operands
[1]);
17993 /* The only case when this might happen is when
17994 you are looking at the length of a DImode instruction
17995 that has an invalid constant in it. */
17996 if (code0
== REG
&& code1
!= MEM
)
17998 gcc_assert (!emit
);
18005 unsigned int reg0
= REGNO (operands
[0]);
18007 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
18009 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
18011 switch (GET_CODE (XEXP (operands
[1], 0)))
18018 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
18019 output_asm_insn ("ldrd%?\t%0, [%m1]", operands
);
18021 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18026 gcc_assert (TARGET_LDRD
);
18028 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands
);
18035 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands
);
18037 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands
);
18045 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands
);
18047 output_asm_insn ("ldmia%?\t%m1!, %M0", operands
);
18052 gcc_assert (TARGET_LDRD
);
18054 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands
);
18059 /* Autoicrement addressing modes should never have overlapping
18060 base and destination registers, and overlapping index registers
18061 are already prohibited, so this doesn't need to worry about
18063 otherops
[0] = operands
[0];
18064 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
18065 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
18067 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
18069 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
18071 /* Registers overlap so split out the increment. */
18074 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
18075 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops
);
18082 /* Use a single insn if we can.
18083 FIXME: IWMMXT allows offsets larger than ldrd can
18084 handle, fix these up with a pair of ldr. */
18086 || !CONST_INT_P (otherops
[2])
18087 || (INTVAL (otherops
[2]) > -256
18088 && INTVAL (otherops
[2]) < 256))
18091 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops
);
18097 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
18098 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18108 /* Use a single insn if we can.
18109 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18110 fix these up with a pair of ldr. */
18112 || !CONST_INT_P (otherops
[2])
18113 || (INTVAL (otherops
[2]) > -256
18114 && INTVAL (otherops
[2]) < 256))
18117 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops
);
18123 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
18124 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
18134 /* We might be able to use ldrd %0, %1 here. However the range is
18135 different to ldr/adr, and it is broken on some ARMv7-M
18136 implementations. */
18137 /* Use the second register of the pair to avoid problematic
18139 otherops
[1] = operands
[1];
18141 output_asm_insn ("adr%?\t%0, %1", otherops
);
18142 operands
[1] = otherops
[0];
18146 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18148 output_asm_insn ("ldmia%?\t%1, %M0", operands
);
18155 /* ??? This needs checking for thumb2. */
18157 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
18158 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
18160 otherops
[0] = operands
[0];
18161 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
18162 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
18164 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
18166 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18168 switch ((int) INTVAL (otherops
[2]))
18172 output_asm_insn ("ldmdb%?\t%1, %M0", otherops
);
18178 output_asm_insn ("ldmda%?\t%1, %M0", otherops
);
18184 output_asm_insn ("ldmib%?\t%1, %M0", otherops
);
18188 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
18189 operands
[1] = otherops
[0];
18191 && (REG_P (otherops
[2])
18193 || (CONST_INT_P (otherops
[2])
18194 && INTVAL (otherops
[2]) > -256
18195 && INTVAL (otherops
[2]) < 256)))
18197 if (reg_overlap_mentioned_p (operands
[0],
18200 /* Swap base and index registers over to
18201 avoid a conflict. */
18202 std::swap (otherops
[1], otherops
[2]);
18204 /* If both registers conflict, it will usually
18205 have been fixed by a splitter. */
18206 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
18207 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
18211 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18212 output_asm_insn ("ldrd%?\t%0, [%1]", operands
);
18219 otherops
[0] = operands
[0];
18221 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops
);
18226 if (CONST_INT_P (otherops
[2]))
18230 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
18231 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
18233 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18239 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
18245 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
18252 return "ldrd%?\t%0, [%1]";
18254 return "ldmia%?\t%1, %M0";
18258 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
18259 /* Take care of overlapping base/data reg. */
18260 if (reg_mentioned_p (operands
[0], operands
[1]))
18264 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18265 output_asm_insn ("ldr%?\t%0, %1", operands
);
18275 output_asm_insn ("ldr%?\t%0, %1", operands
);
18276 output_asm_insn ("ldr%?\t%0, %1", otherops
);
18286 /* Constraints should ensure this. */
18287 gcc_assert (code0
== MEM
&& code1
== REG
);
18288 gcc_assert ((REGNO (operands
[1]) != IP_REGNUM
)
18289 || (TARGET_ARM
&& TARGET_LDRD
));
18291 switch (GET_CODE (XEXP (operands
[0], 0)))
18297 output_asm_insn ("strd%?\t%1, [%m0]", operands
);
18299 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18304 gcc_assert (TARGET_LDRD
);
18306 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands
);
18313 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands
);
18315 output_asm_insn ("stmdb%?\t%m0!, %M1", operands
);
18323 output_asm_insn ("strd%?\t%1, [%m0], #8", operands
);
18325 output_asm_insn ("stm%?\t%m0!, %M1", operands
);
18330 gcc_assert (TARGET_LDRD
);
18332 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands
);
18337 otherops
[0] = operands
[1];
18338 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
18339 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
18341 /* IWMMXT allows offsets larger than ldrd can handle,
18342 fix these up with a pair of ldr. */
18344 && CONST_INT_P (otherops
[2])
18345 && (INTVAL(otherops
[2]) <= -256
18346 || INTVAL(otherops
[2]) >= 256))
18348 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18352 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
18353 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18362 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
18363 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
18369 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
18372 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops
);
18377 output_asm_insn ("strd%?\t%0, [%1], %2", otherops
);
18382 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
18383 if (CONST_INT_P (otherops
[2]) && !TARGET_LDRD
)
18385 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
18389 output_asm_insn ("stmdb%?\t%m0, %M1", operands
);
18396 output_asm_insn ("stmda%?\t%m0, %M1", operands
);
18403 output_asm_insn ("stmib%?\t%m0, %M1", operands
);
18408 && (REG_P (otherops
[2])
18410 || (CONST_INT_P (otherops
[2])
18411 && INTVAL (otherops
[2]) > -256
18412 && INTVAL (otherops
[2]) < 256)))
18414 otherops
[0] = operands
[1];
18415 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
18417 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops
);
18423 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
18424 otherops
[1] = operands
[1];
18427 output_asm_insn ("str%?\t%1, %0", operands
);
18428 output_asm_insn ("str%?\t%H1, %0", otherops
);
18438 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18439 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18442 output_move_quad (rtx
*operands
)
18444 if (REG_P (operands
[0]))
18446 /* Load, or reg->reg move. */
18448 if (MEM_P (operands
[1]))
18450 switch (GET_CODE (XEXP (operands
[1], 0)))
18453 output_asm_insn ("ldmia%?\t%m1, %M0", operands
);
18458 output_asm_insn ("adr%?\t%0, %1", operands
);
18459 output_asm_insn ("ldmia%?\t%0, %M0", operands
);
18463 gcc_unreachable ();
18471 gcc_assert (REG_P (operands
[1]));
18473 dest
= REGNO (operands
[0]);
18474 src
= REGNO (operands
[1]);
18476 /* This seems pretty dumb, but hopefully GCC won't try to do it
18479 for (i
= 0; i
< 4; i
++)
18481 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18482 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18483 output_asm_insn ("mov%?\t%0, %1", ops
);
18486 for (i
= 3; i
>= 0; i
--)
18488 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
18489 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
18490 output_asm_insn ("mov%?\t%0, %1", ops
);
18496 gcc_assert (MEM_P (operands
[0]));
18497 gcc_assert (REG_P (operands
[1]));
18498 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
18500 switch (GET_CODE (XEXP (operands
[0], 0)))
18503 output_asm_insn ("stm%?\t%m0, %M1", operands
);
18507 gcc_unreachable ();
18514 /* Output a VFP load or store instruction. */
18517 output_move_vfp (rtx
*operands
)
18519 rtx reg
, mem
, addr
, ops
[2];
18520 int load
= REG_P (operands
[0]);
18521 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
18522 int sp
= (!TARGET_VFP_FP16INST
18523 || GET_MODE_SIZE (GET_MODE (operands
[0])) == 4);
18524 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
18529 reg
= operands
[!load
];
18530 mem
= operands
[load
];
18532 mode
= GET_MODE (reg
);
18534 gcc_assert (REG_P (reg
));
18535 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
18536 gcc_assert ((mode
== HFmode
&& TARGET_HARD_FLOAT
)
18542 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
18543 gcc_assert (MEM_P (mem
));
18545 addr
= XEXP (mem
, 0);
18547 switch (GET_CODE (addr
))
18550 templ
= "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18551 ops
[0] = XEXP (addr
, 0);
18556 templ
= "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18557 ops
[0] = XEXP (addr
, 0);
18562 templ
= "v%sr%%?.%s\t%%%s0, %%1%s";
18568 sprintf (buff
, templ
,
18569 load
? "ld" : "st",
18570 dp
? "64" : sp
? "32" : "16",
18572 integer_p
? "\t%@ int" : "");
18573 output_asm_insn (buff
, ops
);
18578 /* Output a Neon double-word or quad-word load or store, or a load
18579 or store for larger structure modes.
18581 WARNING: The ordering of elements is weird in big-endian mode,
18582 because the EABI requires that vectors stored in memory appear
18583 as though they were stored by a VSTM, as required by the EABI.
18584 GCC RTL defines element ordering based on in-memory order.
18585 This can be different from the architectural ordering of elements
18586 within a NEON register. The intrinsics defined in arm_neon.h use the
18587 NEON register element ordering, not the GCC RTL element ordering.
18589 For example, the in-memory ordering of a big-endian a quadword
18590 vector with 16-bit elements when stored from register pair {d0,d1}
18591 will be (lowest address first, d0[N] is NEON register element N):
18593 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18595 When necessary, quadword registers (dN, dN+1) are moved to ARM
18596 registers from rN in the order:
18598 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18600 So that STM/LDM can be used on vectors in ARM registers, and the
18601 same memory layout will result as if VSTM/VLDM were used.
18603 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18604 possible, which allows use of appropriate alignment tags.
18605 Note that the choice of "64" is independent of the actual vector
18606 element size; this size simply ensures that the behavior is
18607 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18609 Due to limitations of those instructions, use of VST1.64/VLD1.64
18610 is not possible if:
18611 - the address contains PRE_DEC, or
18612 - the mode refers to more than 4 double-word registers
18614 In those cases, it would be possible to replace VSTM/VLDM by a
18615 sequence of instructions; this is not currently implemented since
18616 this is not certain to actually improve performance. */
18619 output_move_neon (rtx
*operands
)
18621 rtx reg
, mem
, addr
, ops
[2];
18622 int regno
, nregs
, load
= REG_P (operands
[0]);
18627 reg
= operands
[!load
];
18628 mem
= operands
[load
];
18630 mode
= GET_MODE (reg
);
18632 gcc_assert (REG_P (reg
));
18633 regno
= REGNO (reg
);
18634 nregs
= REG_NREGS (reg
) / 2;
18635 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
18636 || NEON_REGNO_OK_FOR_QUAD (regno
));
18637 gcc_assert (VALID_NEON_DREG_MODE (mode
)
18638 || VALID_NEON_QREG_MODE (mode
)
18639 || VALID_NEON_STRUCT_MODE (mode
));
18640 gcc_assert (MEM_P (mem
));
18642 addr
= XEXP (mem
, 0);
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18646 addr
= XEXP (addr
, 0);
18648 switch (GET_CODE (addr
))
18651 /* We have to use vldm / vstm for too-large modes. */
18654 templ
= "v%smia%%?\t%%0!, %%h1";
18655 ops
[0] = XEXP (addr
, 0);
18659 templ
= "v%s1.64\t%%h1, %%A0";
18666 /* We have to use vldm / vstm in this case, since there is no
18667 pre-decrement form of the vld1 / vst1 instructions. */
18668 templ
= "v%smdb%%?\t%%0!, %%h1";
18669 ops
[0] = XEXP (addr
, 0);
18674 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18675 gcc_unreachable ();
18678 /* We have to use vldm / vstm for too-large modes. */
18682 templ
= "v%smia%%?\t%%m0, %%h1";
18684 templ
= "v%s1.64\t%%h1, %%A0";
18690 /* Fall through. */
18696 for (i
= 0; i
< nregs
; i
++)
18698 /* We're only using DImode here because it's a convenient size. */
18699 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
18700 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
18701 if (reg_overlap_mentioned_p (ops
[0], mem
))
18703 gcc_assert (overlap
== -1);
18708 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18709 output_asm_insn (buff
, ops
);
18714 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
18715 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
18716 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
18717 output_asm_insn (buff
, ops
);
18724 gcc_unreachable ();
18727 sprintf (buff
, templ
, load
? "ld" : "st");
18728 output_asm_insn (buff
, ops
);
18733 /* Compute and return the length of neon_mov<mode>, where <mode> is
18734 one of VSTRUCT modes: EI, OI, CI or XI. */
18736 arm_attr_length_move_neon (rtx_insn
*insn
)
18738 rtx reg
, mem
, addr
;
18742 extract_insn_cached (insn
);
18744 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
18746 mode
= GET_MODE (recog_data
.operand
[0]);
18757 gcc_unreachable ();
18761 load
= REG_P (recog_data
.operand
[0]);
18762 reg
= recog_data
.operand
[!load
];
18763 mem
= recog_data
.operand
[load
];
18765 gcc_assert (MEM_P (mem
));
18767 addr
= XEXP (mem
, 0);
18769 /* Strip off const from addresses like (const (plus (...))). */
18770 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
18771 addr
= XEXP (addr
, 0);
18773 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
18775 int insns
= REG_NREGS (reg
) / 2;
18782 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18786 arm_address_offset_is_imm (rtx_insn
*insn
)
18790 extract_insn_cached (insn
);
18792 if (REG_P (recog_data
.operand
[0]))
18795 mem
= recog_data
.operand
[0];
18797 gcc_assert (MEM_P (mem
));
18799 addr
= XEXP (mem
, 0);
18802 || (GET_CODE (addr
) == PLUS
18803 && REG_P (XEXP (addr
, 0))
18804 && CONST_INT_P (XEXP (addr
, 1))))
18810 /* Output an ADD r, s, #n where n may be too big for one instruction.
18811 If adding zero to one register, output nothing. */
18813 output_add_immediate (rtx
*operands
)
18815 HOST_WIDE_INT n
= INTVAL (operands
[2]);
18817 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
18820 output_multi_immediate (operands
,
18821 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18824 output_multi_immediate (operands
,
18825 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18832 /* Output a multiple immediate operation.
18833 OPERANDS is the vector of operands referred to in the output patterns.
18834 INSTR1 is the output pattern to use for the first constant.
18835 INSTR2 is the output pattern to use for subsequent constants.
18836 IMMED_OP is the index of the constant slot in OPERANDS.
18837 N is the constant value. */
18838 static const char *
18839 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
18840 int immed_op
, HOST_WIDE_INT n
)
18842 #if HOST_BITS_PER_WIDE_INT > 32
18848 /* Quick and easy output. */
18849 operands
[immed_op
] = const0_rtx
;
18850 output_asm_insn (instr1
, operands
);
18855 const char * instr
= instr1
;
18857 /* Note that n is never zero here (which would give no output). */
18858 for (i
= 0; i
< 32; i
+= 2)
18862 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
18863 output_asm_insn (instr
, operands
);
18873 /* Return the name of a shifter operation. */
18874 static const char *
18875 arm_shift_nmem(enum rtx_code code
)
18880 return ARM_LSL_NAME
;
18896 /* Return the appropriate ARM instruction for the operation code.
18897 The returned result should not be overwritten. OP is the rtx of the
18898 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18901 arithmetic_instr (rtx op
, int shift_first_arg
)
18903 switch (GET_CODE (op
))
18909 return shift_first_arg
? "rsb" : "sub";
18924 return arm_shift_nmem(GET_CODE(op
));
18927 gcc_unreachable ();
18931 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18932 for the operation code. The returned result should not be overwritten.
18933 OP is the rtx code of the shift.
18934 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18936 static const char *
18937 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
18940 enum rtx_code code
= GET_CODE (op
);
18945 if (!CONST_INT_P (XEXP (op
, 1)))
18947 output_operand_lossage ("invalid shift operand");
18952 *amountp
= 32 - INTVAL (XEXP (op
, 1));
18960 mnem
= arm_shift_nmem(code
);
18961 if (CONST_INT_P (XEXP (op
, 1)))
18963 *amountp
= INTVAL (XEXP (op
, 1));
18965 else if (REG_P (XEXP (op
, 1)))
18972 output_operand_lossage ("invalid shift operand");
18978 /* We never have to worry about the amount being other than a
18979 power of 2, since this case can never be reloaded from a reg. */
18980 if (!CONST_INT_P (XEXP (op
, 1)))
18982 output_operand_lossage ("invalid shift operand");
18986 *amountp
= INTVAL (XEXP (op
, 1)) & 0xFFFFFFFF;
18988 /* Amount must be a power of two. */
18989 if (*amountp
& (*amountp
- 1))
18991 output_operand_lossage ("invalid shift operand");
18995 *amountp
= exact_log2 (*amountp
);
18996 gcc_assert (IN_RANGE (*amountp
, 0, 31));
18997 return ARM_LSL_NAME
;
19000 output_operand_lossage ("invalid shift operand");
19004 /* This is not 100% correct, but follows from the desire to merge
19005 multiplication by a power of 2 with the recognizer for a
19006 shift. >=32 is not a valid shift for "lsl", so we must try and
19007 output a shift that produces the correct arithmetical result.
19008 Using lsr #32 is identical except for the fact that the carry bit
19009 is not set correctly if we set the flags; but we never use the
19010 carry bit from such an operation, so we can ignore that. */
19011 if (code
== ROTATERT
)
19012 /* Rotate is just modulo 32. */
19014 else if (*amountp
!= (*amountp
& 31))
19016 if (code
== ASHIFT
)
19021 /* Shifts of 0 are no-ops. */
19028 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19029 because /bin/as is horribly restrictive. The judgement about
19030 whether or not each character is 'printable' (and can be output as
19031 is) or not (and must be printed with an octal escape) must be made
19032 with reference to the *host* character set -- the situation is
19033 similar to that discussed in the comments above pp_c_char in
19034 c-pretty-print.c. */
19036 #define MAX_ASCII_LEN 51
19039 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
19042 int len_so_far
= 0;
19044 fputs ("\t.ascii\t\"", stream
);
19046 for (i
= 0; i
< len
; i
++)
19050 if (len_so_far
>= MAX_ASCII_LEN
)
19052 fputs ("\"\n\t.ascii\t\"", stream
);
19058 if (c
== '\\' || c
== '\"')
19060 putc ('\\', stream
);
19068 fprintf (stream
, "\\%03o", c
);
19073 fputs ("\"\n", stream
);
19076 /* Whether a register is callee saved or not. This is necessary because high
19077 registers are marked as caller saved when optimizing for size on Thumb-1
19078 targets despite being callee saved in order to avoid using them. */
19079 #define callee_saved_reg_p(reg) \
19080 (!call_used_regs[reg] \
19081 || (TARGET_THUMB1 && optimize_size \
19082 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19084 /* Compute the register save mask for registers 0 through 12
19085 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19087 static unsigned long
19088 arm_compute_save_reg0_reg12_mask (void)
19090 unsigned long func_type
= arm_current_func_type ();
19091 unsigned long save_reg_mask
= 0;
19094 if (IS_INTERRUPT (func_type
))
19096 unsigned int max_reg
;
19097 /* Interrupt functions must not corrupt any registers,
19098 even call clobbered ones. If this is a leaf function
19099 we can just examine the registers used by the RTL, but
19100 otherwise we have to assume that whatever function is
19101 called might clobber anything, and so we have to save
19102 all the call-clobbered registers as well. */
19103 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
19104 /* FIQ handlers have registers r8 - r12 banked, so
19105 we only need to check r0 - r7, Normal ISRs only
19106 bank r14 and r15, so we must check up to r12.
19107 r13 is the stack pointer which is always preserved,
19108 so we do not need to consider it here. */
19113 for (reg
= 0; reg
<= max_reg
; reg
++)
19114 if (df_regs_ever_live_p (reg
)
19115 || (! crtl
->is_leaf
&& call_used_regs
[reg
]))
19116 save_reg_mask
|= (1 << reg
);
19118 /* Also save the pic base register if necessary. */
19120 && !TARGET_SINGLE_PIC_BASE
19121 && arm_pic_register
!= INVALID_REGNUM
19122 && crtl
->uses_pic_offset_table
)
19123 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19125 else if (IS_VOLATILE(func_type
))
19127 /* For noreturn functions we historically omitted register saves
19128 altogether. However this really messes up debugging. As a
19129 compromise save just the frame pointers. Combined with the link
19130 register saved elsewhere this should be sufficient to get
19132 if (frame_pointer_needed
)
19133 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19134 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
19135 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19136 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
19137 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
19141 /* In the normal case we only need to save those registers
19142 which are call saved and which are used by this function. */
19143 for (reg
= 0; reg
<= 11; reg
++)
19144 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19145 save_reg_mask
|= (1 << reg
);
19147 /* Handle the frame pointer as a special case. */
19148 if (frame_pointer_needed
)
19149 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19151 /* If we aren't loading the PIC register,
19152 don't stack it even though it may be live. */
19154 && !TARGET_SINGLE_PIC_BASE
19155 && arm_pic_register
!= INVALID_REGNUM
19156 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
19157 || crtl
->uses_pic_offset_table
))
19158 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19160 /* The prologue will copy SP into R0, so save it. */
19161 if (IS_STACKALIGN (func_type
))
19162 save_reg_mask
|= 1;
19165 /* Save registers so the exception handler can modify them. */
19166 if (crtl
->calls_eh_return
)
19172 reg
= EH_RETURN_DATA_REGNO (i
);
19173 if (reg
== INVALID_REGNUM
)
19175 save_reg_mask
|= 1 << reg
;
19179 return save_reg_mask
;
19182 /* Return true if r3 is live at the start of the function. */
19185 arm_r3_live_at_start_p (void)
19187 /* Just look at cfg info, which is still close enough to correct at this
19188 point. This gives false positives for broken functions that might use
19189 uninitialized data that happens to be allocated in r3, but who cares? */
19190 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 3);
19193 /* Compute the number of bytes used to store the static chain register on the
19194 stack, above the stack frame. We need to know this accurately to get the
19195 alignment of the rest of the stack frame correct. */
19198 arm_compute_static_chain_stack_bytes (void)
19200 /* See the defining assertion in arm_expand_prologue. */
19201 if (IS_NESTED (arm_current_func_type ())
19202 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19203 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
19204 || flag_stack_clash_protection
)
19205 && !df_regs_ever_live_p (LR_REGNUM
)))
19206 && arm_r3_live_at_start_p ()
19207 && crtl
->args
.pretend_args_size
== 0)
19213 /* Compute a bit mask of which core registers need to be
19214 saved on the stack for the current function.
19215 This is used by arm_compute_frame_layout, which may add extra registers. */
19217 static unsigned long
19218 arm_compute_save_core_reg_mask (void)
19220 unsigned int save_reg_mask
= 0;
19221 unsigned long func_type
= arm_current_func_type ();
19224 if (IS_NAKED (func_type
))
19225 /* This should never really happen. */
19228 /* If we are creating a stack frame, then we must save the frame pointer,
19229 IP (which will hold the old stack pointer), LR and the PC. */
19230 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
19232 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
19235 | (1 << PC_REGNUM
);
19237 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
19239 /* Decide if we need to save the link register.
19240 Interrupt routines have their own banked link register,
19241 so they never need to save it.
19242 Otherwise if we do not use the link register we do not need to save
19243 it. If we are pushing other registers onto the stack however, we
19244 can save an instruction in the epilogue by pushing the link register
19245 now and then popping it back into the PC. This incurs extra memory
19246 accesses though, so we only do it when optimizing for size, and only
19247 if we know that we will not need a fancy return sequence. */
19248 if (df_regs_ever_live_p (LR_REGNUM
)
19251 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
19252 && !crtl
->tail_call_emit
19253 && !crtl
->calls_eh_return
))
19254 save_reg_mask
|= 1 << LR_REGNUM
;
19256 if (cfun
->machine
->lr_save_eliminated
)
19257 save_reg_mask
&= ~ (1 << LR_REGNUM
);
19259 if (TARGET_REALLY_IWMMXT
19260 && ((bit_count (save_reg_mask
)
19261 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
19262 arm_compute_static_chain_stack_bytes())
19265 /* The total number of registers that are going to be pushed
19266 onto the stack is odd. We need to ensure that the stack
19267 is 64-bit aligned before we start to save iWMMXt registers,
19268 and also before we start to create locals. (A local variable
19269 might be a double or long long which we will load/store using
19270 an iWMMXt instruction). Therefore we need to push another
19271 ARM register, so that the stack will be 64-bit aligned. We
19272 try to avoid using the arg registers (r0 -r3) as they might be
19273 used to pass values in a tail call. */
19274 for (reg
= 4; reg
<= 12; reg
++)
19275 if ((save_reg_mask
& (1 << reg
)) == 0)
19279 save_reg_mask
|= (1 << reg
);
19282 cfun
->machine
->sibcall_blocked
= 1;
19283 save_reg_mask
|= (1 << 3);
19287 /* We may need to push an additional register for use initializing the
19288 PIC base register. */
19289 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
19290 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
19292 reg
= thumb_find_work_register (1 << 4);
19293 if (!call_used_regs
[reg
])
19294 save_reg_mask
|= (1 << reg
);
19297 return save_reg_mask
;
19300 /* Compute a bit mask of which core registers need to be
19301 saved on the stack for the current function. */
19302 static unsigned long
19303 thumb1_compute_save_core_reg_mask (void)
19305 unsigned long mask
;
19309 for (reg
= 0; reg
< 12; reg
++)
19310 if (df_regs_ever_live_p (reg
) && callee_saved_reg_p (reg
))
19313 /* Handle the frame pointer as a special case. */
19314 if (frame_pointer_needed
)
19315 mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
19318 && !TARGET_SINGLE_PIC_BASE
19319 && arm_pic_register
!= INVALID_REGNUM
19320 && crtl
->uses_pic_offset_table
)
19321 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
19323 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19324 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19325 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
19327 /* LR will also be pushed if any lo regs are pushed. */
19328 if (mask
& 0xff || thumb_force_lr_save ())
19329 mask
|= (1 << LR_REGNUM
);
19331 /* Make sure we have a low work register if we need one.
19332 We will need one if we are going to push a high register,
19333 but we are not currently intending to push a low register. */
19334 if ((mask
& 0xff) == 0
19335 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
19337 /* Use thumb_find_work_register to choose which register
19338 we will use. If the register is live then we will
19339 have to push it. Use LAST_LO_REGNUM as our fallback
19340 choice for the register to select. */
19341 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
19342 /* Make sure the register returned by thumb_find_work_register is
19343 not part of the return value. */
19344 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
19345 reg
= LAST_LO_REGNUM
;
19347 if (callee_saved_reg_p (reg
))
19351 /* The 504 below is 8 bytes less than 512 because there are two possible
19352 alignment words. We can't tell here if they will be present or not so we
19353 have to play it safe and assume that they are. */
19354 if ((CALLER_INTERWORKING_SLOT_SIZE
+
19355 ROUND_UP_WORD (get_frame_size ()) +
19356 crtl
->outgoing_args_size
) >= 504)
19358 /* This is the same as the code in thumb1_expand_prologue() which
19359 determines which register to use for stack decrement. */
19360 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
19361 if (mask
& (1 << reg
))
19364 if (reg
> LAST_LO_REGNUM
)
19366 /* Make sure we have a register available for stack decrement. */
19367 mask
|= 1 << LAST_LO_REGNUM
;
19375 /* Return the number of bytes required to save VFP registers. */
19377 arm_get_vfp_saved_size (void)
19379 unsigned int regno
;
19384 /* Space for saved VFP registers. */
19385 if (TARGET_HARD_FLOAT
)
19388 for (regno
= FIRST_VFP_REGNUM
;
19389 regno
< LAST_VFP_REGNUM
;
19392 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
19393 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
19397 /* Workaround ARM10 VFPr1 bug. */
19398 if (count
== 2 && !arm_arch6
)
19400 saved
+= count
* 8;
19409 if (count
== 2 && !arm_arch6
)
19411 saved
+= count
* 8;
19418 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19419 everything bar the final return instruction. If simple_return is true,
19420 then do not output epilogue, because it has already been emitted in RTL.
19422 Note: do not forget to update length attribute of corresponding insn pattern
19423 when changing assembly output (eg. length attribute of
19424 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19425 register clearing sequences). */
19427 output_return_instruction (rtx operand
, bool really_return
, bool reverse
,
19428 bool simple_return
)
19430 char conditional
[10];
19433 unsigned long live_regs_mask
;
19434 unsigned long func_type
;
19435 arm_stack_offsets
*offsets
;
19437 func_type
= arm_current_func_type ();
19439 if (IS_NAKED (func_type
))
19442 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
19444 /* If this function was declared non-returning, and we have
19445 found a tail call, then we have to trust that the called
19446 function won't return. */
19451 /* Otherwise, trap an attempted return by aborting. */
19453 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
19455 assemble_external_libcall (ops
[1]);
19456 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
19462 gcc_assert (!cfun
->calls_alloca
|| really_return
);
19464 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
19466 cfun
->machine
->return_used_this_function
= 1;
19468 offsets
= arm_get_frame_offsets ();
19469 live_regs_mask
= offsets
->saved_regs_mask
;
19471 if (!simple_return
&& live_regs_mask
)
19473 const char * return_reg
;
19475 /* If we do not have any special requirements for function exit
19476 (e.g. interworking) then we can load the return address
19477 directly into the PC. Otherwise we must load it into LR. */
19479 && !IS_CMSE_ENTRY (func_type
)
19480 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
19481 return_reg
= reg_names
[PC_REGNUM
];
19483 return_reg
= reg_names
[LR_REGNUM
];
19485 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
19487 /* There are three possible reasons for the IP register
19488 being saved. 1) a stack frame was created, in which case
19489 IP contains the old stack pointer, or 2) an ISR routine
19490 corrupted it, or 3) it was saved to align the stack on
19491 iWMMXt. In case 1, restore IP into SP, otherwise just
19493 if (frame_pointer_needed
)
19495 live_regs_mask
&= ~ (1 << IP_REGNUM
);
19496 live_regs_mask
|= (1 << SP_REGNUM
);
19499 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
19502 /* On some ARM architectures it is faster to use LDR rather than
19503 LDM to load a single register. On other architectures, the
19504 cost is the same. In 26 bit mode, or for exception handlers,
19505 we have to use LDM to load the PC so that the CPSR is also
19507 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
19508 if (live_regs_mask
== (1U << reg
))
19511 if (reg
<= LAST_ARM_REGNUM
19512 && (reg
!= LR_REGNUM
19514 || ! IS_INTERRUPT (func_type
)))
19516 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
19517 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
19524 /* Generate the load multiple instruction to restore the
19525 registers. Note we can get here, even if
19526 frame_pointer_needed is true, but only if sp already
19527 points to the base of the saved core registers. */
19528 if (live_regs_mask
& (1 << SP_REGNUM
))
19530 unsigned HOST_WIDE_INT stack_adjust
;
19532 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
19533 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
19535 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
19536 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
19539 /* If we can't use ldmib (SA110 bug),
19540 then try to pop r3 instead. */
19542 live_regs_mask
|= 1 << 3;
19544 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
19547 /* For interrupt returns we have to use an LDM rather than
19548 a POP so that we can use the exception return variant. */
19549 else if (IS_INTERRUPT (func_type
))
19550 sprintf (instr
, "ldmfd%s\t%%|sp!, {", conditional
);
19552 sprintf (instr
, "pop%s\t{", conditional
);
19554 p
= instr
+ strlen (instr
);
19556 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
19557 if (live_regs_mask
& (1 << reg
))
19559 int l
= strlen (reg_names
[reg
]);
19565 memcpy (p
, ", ", 2);
19569 memcpy (p
, "%|", 2);
19570 memcpy (p
+ 2, reg_names
[reg
], l
);
19574 if (live_regs_mask
& (1 << LR_REGNUM
))
19576 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
19577 /* If returning from an interrupt, restore the CPSR. */
19578 if (IS_INTERRUPT (func_type
))
19585 output_asm_insn (instr
, & operand
);
19587 /* See if we need to generate an extra instruction to
19588 perform the actual function return. */
19590 && func_type
!= ARM_FT_INTERWORKED
19591 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
19593 /* The return has already been handled
19594 by loading the LR into the PC. */
19601 switch ((int) ARM_FUNC_TYPE (func_type
))
19605 /* ??? This is wrong for unified assembly syntax. */
19606 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
19609 case ARM_FT_INTERWORKED
:
19610 gcc_assert (arm_arch5
|| arm_arch4t
);
19611 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19614 case ARM_FT_EXCEPTION
:
19615 /* ??? This is wrong for unified assembly syntax. */
19616 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
19620 if (IS_CMSE_ENTRY (func_type
))
19622 /* Check if we have to clear the 'GE bits' which is only used if
19623 parallel add and subtraction instructions are available. */
19624 if (TARGET_INT_SIMD
)
19625 snprintf (instr
, sizeof (instr
),
19626 "msr%s\tAPSR_nzcvqg, %%|lr", conditional
);
19628 snprintf (instr
, sizeof (instr
),
19629 "msr%s\tAPSR_nzcvq, %%|lr", conditional
);
19631 output_asm_insn (instr
, & operand
);
19632 if (TARGET_HARD_FLOAT
&& !TARGET_THUMB1
)
19634 /* Clear the cumulative exception-status bits (0-4,7) and the
19635 condition code bits (28-31) of the FPSCR. We need to
19636 remember to clear the first scratch register used (IP) and
19637 save and restore the second (r4). */
19638 snprintf (instr
, sizeof (instr
), "push\t{%%|r4}");
19639 output_asm_insn (instr
, & operand
);
19640 snprintf (instr
, sizeof (instr
), "vmrs\t%%|ip, fpscr");
19641 output_asm_insn (instr
, & operand
);
19642 snprintf (instr
, sizeof (instr
), "movw\t%%|r4, #65376");
19643 output_asm_insn (instr
, & operand
);
19644 snprintf (instr
, sizeof (instr
), "movt\t%%|r4, #4095");
19645 output_asm_insn (instr
, & operand
);
19646 snprintf (instr
, sizeof (instr
), "and\t%%|ip, %%|r4");
19647 output_asm_insn (instr
, & operand
);
19648 snprintf (instr
, sizeof (instr
), "vmsr\tfpscr, %%|ip");
19649 output_asm_insn (instr
, & operand
);
19650 snprintf (instr
, sizeof (instr
), "pop\t{%%|r4}");
19651 output_asm_insn (instr
, & operand
);
19652 snprintf (instr
, sizeof (instr
), "mov\t%%|ip, %%|lr");
19653 output_asm_insn (instr
, & operand
);
19655 snprintf (instr
, sizeof (instr
), "bxns\t%%|lr");
19657 /* Use bx if it's available. */
19658 else if (arm_arch5
|| arm_arch4t
)
19659 sprintf (instr
, "bx%s\t%%|lr", conditional
);
19661 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
19665 output_asm_insn (instr
, & operand
);
19671 /* Output in FILE asm statements needed to declare the NAME of the function
19672 defined by its DECL node. */
19675 arm_asm_declare_function_name (FILE *file
, const char *name
, tree decl
)
19677 size_t cmse_name_len
;
19678 char *cmse_name
= 0;
19679 char cmse_prefix
[] = "__acle_se_";
19681 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19682 extra function label for each function with the 'cmse_nonsecure_entry'
19683 attribute. This extra function label should be prepended with
19684 '__acle_se_', telling the linker that it needs to create secure gateway
19685 veneers for this function. */
19686 if (use_cmse
&& lookup_attribute ("cmse_nonsecure_entry",
19687 DECL_ATTRIBUTES (decl
)))
19689 cmse_name_len
= sizeof (cmse_prefix
) + strlen (name
);
19690 cmse_name
= XALLOCAVEC (char, cmse_name_len
);
19691 snprintf (cmse_name
, cmse_name_len
, "%s%s", cmse_prefix
, name
);
19692 targetm
.asm_out
.globalize_label (file
, cmse_name
);
19694 ARM_DECLARE_FUNCTION_NAME (file
, cmse_name
, decl
);
19695 ASM_OUTPUT_TYPE_DIRECTIVE (file
, cmse_name
, "function");
19698 ARM_DECLARE_FUNCTION_NAME (file
, name
, decl
);
19699 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
19700 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
19701 ASM_OUTPUT_LABEL (file
, name
);
19704 ASM_OUTPUT_LABEL (file
, cmse_name
);
19706 ARM_OUTPUT_FN_UNWIND (file
, TRUE
);
19709 /* Write the function name into the code section, directly preceding
19710 the function prologue.
19712 Code will be output similar to this:
19714 .ascii "arm_poke_function_name", 0
19717 .word 0xff000000 + (t1 - t0)
19718 arm_poke_function_name
19720 stmfd sp!, {fp, ip, lr, pc}
19723 When performing a stack backtrace, code can inspect the value
19724 of 'pc' stored at 'fp' + 0. If the trace function then looks
19725 at location pc - 12 and the top 8 bits are set, then we know
19726 that there is a function name embedded immediately preceding this
19727 location and has length ((pc[-3]) & 0xff000000).
19729 We assume that pc is declared as a pointer to an unsigned long.
19731 It is of no benefit to output the function name if we are assembling
19732 a leaf function. These function types will not contain a stack
19733 backtrace structure, therefore it is not possible to determine the
19736 arm_poke_function_name (FILE *stream
, const char *name
)
19738 unsigned long alignlength
;
19739 unsigned long length
;
19742 length
= strlen (name
) + 1;
19743 alignlength
= ROUND_UP_WORD (length
);
19745 ASM_OUTPUT_ASCII (stream
, name
, length
);
19746 ASM_OUTPUT_ALIGN (stream
, 2);
19747 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
19748 assemble_aligned_integer (UNITS_PER_WORD
, x
);
19751 /* Place some comments into the assembler stream
19752 describing the current function. */
19754 arm_output_function_prologue (FILE *f
)
19756 unsigned long func_type
;
19758 /* Sanity check. */
19759 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
19761 func_type
= arm_current_func_type ();
19763 switch ((int) ARM_FUNC_TYPE (func_type
))
19766 case ARM_FT_NORMAL
:
19768 case ARM_FT_INTERWORKED
:
19769 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
19772 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
19775 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
19777 case ARM_FT_EXCEPTION
:
19778 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
19782 if (IS_NAKED (func_type
))
19783 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19785 if (IS_VOLATILE (func_type
))
19786 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
19788 if (IS_NESTED (func_type
))
19789 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
19790 if (IS_STACKALIGN (func_type
))
19791 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19792 if (IS_CMSE_ENTRY (func_type
))
19793 asm_fprintf (f
, "\t%@ Non-secure entry function: called from non-secure code.\n");
19795 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19797 crtl
->args
.pretend_args_size
,
19798 (HOST_WIDE_INT
) get_frame_size ());
19800 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19801 frame_pointer_needed
,
19802 cfun
->machine
->uses_anonymous_args
);
19804 if (cfun
->machine
->lr_save_eliminated
)
19805 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
19807 if (crtl
->calls_eh_return
)
19808 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
19813 arm_output_function_epilogue (FILE *)
19815 arm_stack_offsets
*offsets
;
19821 /* Emit any call-via-reg trampolines that are needed for v4t support
19822 of call_reg and call_value_reg type insns. */
19823 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
19825 rtx label
= cfun
->machine
->call_via
[regno
];
19829 switch_to_section (function_section (current_function_decl
));
19830 targetm
.asm_out
.internal_label (asm_out_file
, "L",
19831 CODE_LABEL_NUMBER (label
));
19832 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
19836 /* ??? Probably not safe to set this here, since it assumes that a
19837 function will be emitted as assembly immediately after we generate
19838 RTL for it. This does not happen for inline functions. */
19839 cfun
->machine
->return_used_this_function
= 0;
19841 else /* TARGET_32BIT */
19843 /* We need to take into account any stack-frame rounding. */
19844 offsets
= arm_get_frame_offsets ();
19846 gcc_assert (!use_return_insn (FALSE
, NULL
)
19847 || (cfun
->machine
->return_used_this_function
!= 0)
19848 || offsets
->saved_regs
== offsets
->outgoing_args
19849 || frame_pointer_needed
);
19853 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19854 STR and STRD. If an even number of registers are being pushed, one
19855 or more STRD patterns are created for each register pair. If an
19856 odd number of registers are pushed, emit an initial STR followed by
19857 as many STRD instructions as are needed. This works best when the
19858 stack is initially 64-bit aligned (the normal case), since it
19859 ensures that each STRD is also 64-bit aligned. */
19861 thumb2_emit_strd_push (unsigned long saved_regs_mask
)
19866 rtx par
= NULL_RTX
;
19867 rtx dwarf
= NULL_RTX
;
19871 num_regs
= bit_count (saved_regs_mask
);
19873 /* Must be at least one register to save, and can't save SP or PC. */
19874 gcc_assert (num_regs
> 0 && num_regs
<= 14);
19875 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
19876 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
19878 /* Create sequence for DWARF info. All the frame-related data for
19879 debugging is held in this wrapper. */
19880 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
19882 /* Describe the stack adjustment. */
19883 tmp
= gen_rtx_SET (stack_pointer_rtx
,
19884 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
19885 RTX_FRAME_RELATED_P (tmp
) = 1;
19886 XVECEXP (dwarf
, 0, 0) = tmp
;
19888 /* Find the first register. */
19889 for (regno
= 0; (saved_regs_mask
& (1 << regno
)) == 0; regno
++)
19894 /* If there's an odd number of registers to push. Start off by
19895 pushing a single register. This ensures that subsequent strd
19896 operations are dword aligned (assuming that SP was originally
19897 64-bit aligned). */
19898 if ((num_regs
& 1) != 0)
19900 rtx reg
, mem
, insn
;
19902 reg
= gen_rtx_REG (SImode
, regno
);
19904 mem
= gen_frame_mem (Pmode
, gen_rtx_PRE_DEC (Pmode
,
19905 stack_pointer_rtx
));
19907 mem
= gen_frame_mem (Pmode
,
19909 (Pmode
, stack_pointer_rtx
,
19910 plus_constant (Pmode
, stack_pointer_rtx
,
19913 tmp
= gen_rtx_SET (mem
, reg
);
19914 RTX_FRAME_RELATED_P (tmp
) = 1;
19915 insn
= emit_insn (tmp
);
19916 RTX_FRAME_RELATED_P (insn
) = 1;
19917 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19918 tmp
= gen_rtx_SET (gen_frame_mem (Pmode
, stack_pointer_rtx
), reg
);
19919 RTX_FRAME_RELATED_P (tmp
) = 1;
19922 XVECEXP (dwarf
, 0, i
) = tmp
;
19926 while (i
< num_regs
)
19927 if (saved_regs_mask
& (1 << regno
))
19929 rtx reg1
, reg2
, mem1
, mem2
;
19930 rtx tmp0
, tmp1
, tmp2
;
19933 /* Find the register to pair with this one. */
19934 for (regno2
= regno
+ 1; (saved_regs_mask
& (1 << regno2
)) == 0;
19938 reg1
= gen_rtx_REG (SImode
, regno
);
19939 reg2
= gen_rtx_REG (SImode
, regno2
);
19946 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19949 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19951 -4 * (num_regs
- 1)));
19952 tmp0
= gen_rtx_SET (stack_pointer_rtx
,
19953 plus_constant (Pmode
, stack_pointer_rtx
,
19955 tmp1
= gen_rtx_SET (mem1
, reg1
);
19956 tmp2
= gen_rtx_SET (mem2
, reg2
);
19957 RTX_FRAME_RELATED_P (tmp0
) = 1;
19958 RTX_FRAME_RELATED_P (tmp1
) = 1;
19959 RTX_FRAME_RELATED_P (tmp2
) = 1;
19960 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (3));
19961 XVECEXP (par
, 0, 0) = tmp0
;
19962 XVECEXP (par
, 0, 1) = tmp1
;
19963 XVECEXP (par
, 0, 2) = tmp2
;
19964 insn
= emit_insn (par
);
19965 RTX_FRAME_RELATED_P (insn
) = 1;
19966 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19970 mem1
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19973 mem2
= gen_frame_mem (Pmode
, plus_constant (Pmode
,
19976 tmp1
= gen_rtx_SET (mem1
, reg1
);
19977 tmp2
= gen_rtx_SET (mem2
, reg2
);
19978 RTX_FRAME_RELATED_P (tmp1
) = 1;
19979 RTX_FRAME_RELATED_P (tmp2
) = 1;
19980 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
19981 XVECEXP (par
, 0, 0) = tmp1
;
19982 XVECEXP (par
, 0, 1) = tmp2
;
19986 /* Create unwind information. This is an approximation. */
19987 tmp1
= gen_rtx_SET (gen_frame_mem (Pmode
,
19988 plus_constant (Pmode
,
19992 tmp2
= gen_rtx_SET (gen_frame_mem (Pmode
,
19993 plus_constant (Pmode
,
19998 RTX_FRAME_RELATED_P (tmp1
) = 1;
19999 RTX_FRAME_RELATED_P (tmp2
) = 1;
20000 XVECEXP (dwarf
, 0, i
+ 1) = tmp1
;
20001 XVECEXP (dwarf
, 0, i
+ 2) = tmp2
;
20003 regno
= regno2
+ 1;
20011 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20012 whenever possible, otherwise it emits single-word stores. The first store
20013 also allocates stack space for all saved registers, using writeback with
20014 post-addressing mode. All other stores use offset addressing. If no STRD
20015 can be emitted, this function emits a sequence of single-word stores,
20016 and not an STM as before, because single-word stores provide more freedom
20017 scheduling and can be turned into an STM by peephole optimizations. */
20019 arm_emit_strd_push (unsigned long saved_regs_mask
)
20022 int i
, j
, dwarf_index
= 0;
20024 rtx dwarf
= NULL_RTX
;
20025 rtx insn
= NULL_RTX
;
20028 /* TODO: A more efficient code can be emitted by changing the
20029 layout, e.g., first push all pairs that can use STRD to keep the
20030 stack aligned, and then push all other registers. */
20031 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20032 if (saved_regs_mask
& (1 << i
))
20035 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20036 gcc_assert (!(saved_regs_mask
& (1 << PC_REGNUM
)));
20037 gcc_assert (num_regs
> 0);
20039 /* Create sequence for DWARF info. */
20040 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20042 /* For dwarf info, we generate explicit stack update. */
20043 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20044 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20045 RTX_FRAME_RELATED_P (tmp
) = 1;
20046 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20048 /* Save registers. */
20049 offset
= - 4 * num_regs
;
20051 while (j
<= LAST_ARM_REGNUM
)
20052 if (saved_regs_mask
& (1 << j
))
20055 && (saved_regs_mask
& (1 << (j
+ 1))))
20057 /* Current register and previous register form register pair for
20058 which STRD can be generated. */
20061 /* Allocate stack space for all saved registers. */
20062 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20063 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20064 mem
= gen_frame_mem (DImode
, tmp
);
20067 else if (offset
> 0)
20068 mem
= gen_frame_mem (DImode
,
20069 plus_constant (Pmode
,
20073 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20075 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (DImode
, j
));
20076 RTX_FRAME_RELATED_P (tmp
) = 1;
20077 tmp
= emit_insn (tmp
);
20079 /* Record the first store insn. */
20080 if (dwarf_index
== 1)
20083 /* Generate dwarf info. */
20084 mem
= gen_frame_mem (SImode
,
20085 plus_constant (Pmode
,
20088 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20089 RTX_FRAME_RELATED_P (tmp
) = 1;
20090 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20092 mem
= gen_frame_mem (SImode
,
20093 plus_constant (Pmode
,
20096 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
+ 1));
20097 RTX_FRAME_RELATED_P (tmp
) = 1;
20098 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20105 /* Emit a single word store. */
20108 /* Allocate stack space for all saved registers. */
20109 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
20110 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20111 mem
= gen_frame_mem (SImode
, tmp
);
20114 else if (offset
> 0)
20115 mem
= gen_frame_mem (SImode
,
20116 plus_constant (Pmode
,
20120 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20122 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20123 RTX_FRAME_RELATED_P (tmp
) = 1;
20124 tmp
= emit_insn (tmp
);
20126 /* Record the first store insn. */
20127 if (dwarf_index
== 1)
20130 /* Generate dwarf info. */
20131 mem
= gen_frame_mem (SImode
,
20132 plus_constant(Pmode
,
20135 tmp
= gen_rtx_SET (mem
, gen_rtx_REG (SImode
, j
));
20136 RTX_FRAME_RELATED_P (tmp
) = 1;
20137 XVECEXP (dwarf
, 0, dwarf_index
++) = tmp
;
20146 /* Attach dwarf info to the first insn we generate. */
20147 gcc_assert (insn
!= NULL_RTX
);
20148 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20149 RTX_FRAME_RELATED_P (insn
) = 1;
20152 /* Generate and emit an insn that we will recognize as a push_multi.
20153 Unfortunately, since this insn does not reflect very well the actual
20154 semantics of the operation, we need to annotate the insn for the benefit
20155 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20156 MASK for registers that should be annotated for DWARF2 frame unwind
20159 emit_multi_reg_push (unsigned long mask
, unsigned long dwarf_regs_mask
)
20162 int num_dwarf_regs
= 0;
20166 int dwarf_par_index
;
20169 /* We don't record the PC in the dwarf frame information. */
20170 dwarf_regs_mask
&= ~(1 << PC_REGNUM
);
20172 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20174 if (mask
& (1 << i
))
20176 if (dwarf_regs_mask
& (1 << i
))
20180 gcc_assert (num_regs
&& num_regs
<= 16);
20181 gcc_assert ((dwarf_regs_mask
& ~mask
) == 0);
20183 /* For the body of the insn we are going to generate an UNSPEC in
20184 parallel with several USEs. This allows the insn to be recognized
20185 by the push_multi pattern in the arm.md file.
20187 The body of the insn looks something like this:
20190 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20191 (const_int:SI <num>)))
20192 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20198 For the frame note however, we try to be more explicit and actually
20199 show each register being stored into the stack frame, plus a (single)
20200 decrement of the stack pointer. We do it this way in order to be
20201 friendly to the stack unwinding code, which only wants to see a single
20202 stack decrement per instruction. The RTL we generate for the note looks
20203 something like this:
20206 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20207 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20208 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20209 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20213 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20214 instead we'd have a parallel expression detailing all
20215 the stores to the various memory addresses so that debug
20216 information is more up-to-date. Remember however while writing
20217 this to take care of the constraints with the push instruction.
20219 Note also that this has to be taken care of for the VFP registers.
20221 For more see PR43399. */
20223 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
20224 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
20225 dwarf_par_index
= 1;
20227 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20229 if (mask
& (1 << i
))
20231 reg
= gen_rtx_REG (SImode
, i
);
20233 XVECEXP (par
, 0, 0)
20234 = gen_rtx_SET (gen_frame_mem
20236 gen_rtx_PRE_MODIFY (Pmode
,
20239 (Pmode
, stack_pointer_rtx
,
20242 gen_rtx_UNSPEC (BLKmode
,
20243 gen_rtvec (1, reg
),
20244 UNSPEC_PUSH_MULT
));
20246 if (dwarf_regs_mask
& (1 << i
))
20248 tmp
= gen_rtx_SET (gen_frame_mem (SImode
, stack_pointer_rtx
),
20250 RTX_FRAME_RELATED_P (tmp
) = 1;
20251 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20258 for (j
= 1, i
++; j
< num_regs
; i
++)
20260 if (mask
& (1 << i
))
20262 reg
= gen_rtx_REG (SImode
, i
);
20264 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
20266 if (dwarf_regs_mask
& (1 << i
))
20269 = gen_rtx_SET (gen_frame_mem
20271 plus_constant (Pmode
, stack_pointer_rtx
,
20274 RTX_FRAME_RELATED_P (tmp
) = 1;
20275 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
20282 par
= emit_insn (par
);
20284 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20285 plus_constant (Pmode
, stack_pointer_rtx
, -4 * num_regs
));
20286 RTX_FRAME_RELATED_P (tmp
) = 1;
20287 XVECEXP (dwarf
, 0, 0) = tmp
;
20289 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
20294 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20295 SIZE is the offset to be adjusted.
20296 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20298 arm_add_cfa_adjust_cfa_note (rtx insn
, int size
, rtx dest
, rtx src
)
20302 RTX_FRAME_RELATED_P (insn
) = 1;
20303 dwarf
= gen_rtx_SET (dest
, plus_constant (Pmode
, src
, size
));
20304 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, dwarf
);
20307 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20308 SAVED_REGS_MASK shows which registers need to be restored.
20310 Unfortunately, since this insn does not reflect very well the actual
20311 semantics of the operation, we need to annotate the insn for the benefit
20312 of DWARF2 frame unwind information. */
20314 arm_emit_multi_reg_pop (unsigned long saved_regs_mask
)
20319 rtx dwarf
= NULL_RTX
;
20321 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20325 offset_adj
= return_in_pc
? 1 : 0;
20326 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20327 if (saved_regs_mask
& (1 << i
))
20330 gcc_assert (num_regs
&& num_regs
<= 16);
20332 /* If SP is in reglist, then we don't emit SP update insn. */
20333 emit_update
= (saved_regs_mask
& (1 << SP_REGNUM
)) ? 0 : 1;
20335 /* The parallel needs to hold num_regs SETs
20336 and one SET for the stack update. */
20337 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ emit_update
+ offset_adj
));
20340 XVECEXP (par
, 0, 0) = ret_rtx
;
20344 /* Increment the stack pointer, based on there being
20345 num_regs 4-byte registers to restore. */
20346 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20347 plus_constant (Pmode
,
20350 RTX_FRAME_RELATED_P (tmp
) = 1;
20351 XVECEXP (par
, 0, offset_adj
) = tmp
;
20354 /* Now restore every reg, which may include PC. */
20355 for (j
= 0, i
= 0; j
< num_regs
; i
++)
20356 if (saved_regs_mask
& (1 << i
))
20358 reg
= gen_rtx_REG (SImode
, i
);
20359 if ((num_regs
== 1) && emit_update
&& !return_in_pc
)
20361 /* Emit single load with writeback. */
20362 tmp
= gen_frame_mem (SImode
,
20363 gen_rtx_POST_INC (Pmode
,
20364 stack_pointer_rtx
));
20365 tmp
= emit_insn (gen_rtx_SET (reg
, tmp
));
20366 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20370 tmp
= gen_rtx_SET (reg
,
20373 plus_constant (Pmode
, stack_pointer_rtx
, 4 * j
)));
20374 RTX_FRAME_RELATED_P (tmp
) = 1;
20375 XVECEXP (par
, 0, j
+ emit_update
+ offset_adj
) = tmp
;
20377 /* We need to maintain a sequence for DWARF info too. As dwarf info
20378 should not have PC, skip PC. */
20379 if (i
!= PC_REGNUM
)
20380 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20386 par
= emit_jump_insn (par
);
20388 par
= emit_insn (par
);
20390 REG_NOTES (par
) = dwarf
;
20392 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
* num_regs
,
20393 stack_pointer_rtx
, stack_pointer_rtx
);
20396 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20397 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20399 Unfortunately, since this insn does not reflect very well the actual
20400 semantics of the operation, we need to annotate the insn for the benefit
20401 of DWARF2 frame unwind information. */
20403 arm_emit_vfp_multi_reg_pop (int first_reg
, int num_regs
, rtx base_reg
)
20407 rtx dwarf
= NULL_RTX
;
20410 gcc_assert (num_regs
&& num_regs
<= 32);
20412 /* Workaround ARM10 VFPr1 bug. */
20413 if (num_regs
== 2 && !arm_arch6
)
20415 if (first_reg
== 15)
20421 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20422 there could be up to 32 D-registers to restore.
20423 If there are more than 16 D-registers, make two recursive calls,
20424 each of which emits one pop_multi instruction. */
20427 arm_emit_vfp_multi_reg_pop (first_reg
, 16, base_reg
);
20428 arm_emit_vfp_multi_reg_pop (first_reg
+ 16, num_regs
- 16, base_reg
);
20432 /* The parallel needs to hold num_regs SETs
20433 and one SET for the stack update. */
20434 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
+ 1));
20436 /* Increment the stack pointer, based on there being
20437 num_regs 8-byte registers to restore. */
20438 tmp
= gen_rtx_SET (base_reg
, plus_constant (Pmode
, base_reg
, 8 * num_regs
));
20439 RTX_FRAME_RELATED_P (tmp
) = 1;
20440 XVECEXP (par
, 0, 0) = tmp
;
20442 /* Now show every reg that will be restored, using a SET for each. */
20443 for (j
= 0, i
=first_reg
; j
< num_regs
; i
+= 2)
20445 reg
= gen_rtx_REG (DFmode
, i
);
20447 tmp
= gen_rtx_SET (reg
,
20450 plus_constant (Pmode
, base_reg
, 8 * j
)));
20451 RTX_FRAME_RELATED_P (tmp
) = 1;
20452 XVECEXP (par
, 0, j
+ 1) = tmp
;
20454 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20459 par
= emit_insn (par
);
20460 REG_NOTES (par
) = dwarf
;
20462 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20463 if (REGNO (base_reg
) == IP_REGNUM
)
20465 RTX_FRAME_RELATED_P (par
) = 1;
20466 add_reg_note (par
, REG_CFA_DEF_CFA
, hard_frame_pointer_rtx
);
20469 arm_add_cfa_adjust_cfa_note (par
, 2 * UNITS_PER_WORD
* num_regs
,
20470 base_reg
, base_reg
);
20473 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20474 number of registers are being popped, multiple LDRD patterns are created for
20475 all register pairs. If odd number of registers are popped, last register is
20476 loaded by using LDR pattern. */
20478 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask
)
20482 rtx par
= NULL_RTX
;
20483 rtx dwarf
= NULL_RTX
;
20484 rtx tmp
, reg
, tmp1
;
20485 bool return_in_pc
= saved_regs_mask
& (1 << PC_REGNUM
);
20487 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
20488 if (saved_regs_mask
& (1 << i
))
20491 gcc_assert (num_regs
&& num_regs
<= 16);
20493 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20494 to be popped. So, if num_regs is even, now it will become odd,
20495 and we can generate pop with PC. If num_regs is odd, it will be
20496 even now, and ldr with return can be generated for PC. */
20500 gcc_assert (!(saved_regs_mask
& (1 << SP_REGNUM
)));
20502 /* Var j iterates over all the registers to gather all the registers in
20503 saved_regs_mask. Var i gives index of saved registers in stack frame.
20504 A PARALLEL RTX of register-pair is created here, so that pattern for
20505 LDRD can be matched. As PC is always last register to be popped, and
20506 we have already decremented num_regs if PC, we don't have to worry
20507 about PC in this loop. */
20508 for (i
= 0, j
= 0; i
< (num_regs
- (num_regs
% 2)); j
++)
20509 if (saved_regs_mask
& (1 << j
))
20511 /* Create RTX for memory load. */
20512 reg
= gen_rtx_REG (SImode
, j
);
20513 tmp
= gen_rtx_SET (reg
,
20514 gen_frame_mem (SImode
,
20515 plus_constant (Pmode
,
20516 stack_pointer_rtx
, 4 * i
)));
20517 RTX_FRAME_RELATED_P (tmp
) = 1;
20521 /* When saved-register index (i) is even, the RTX to be emitted is
20522 yet to be created. Hence create it first. The LDRD pattern we
20523 are generating is :
20524 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20525 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20526 where target registers need not be consecutive. */
20527 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20531 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20532 added as 0th element and if i is odd, reg_i is added as 1st element
20533 of LDRD pattern shown above. */
20534 XVECEXP (par
, 0, (i
% 2)) = tmp
;
20535 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20539 /* When saved-register index (i) is odd, RTXs for both the registers
20540 to be loaded are generated in above given LDRD pattern, and the
20541 pattern can be emitted now. */
20542 par
= emit_insn (par
);
20543 REG_NOTES (par
) = dwarf
;
20544 RTX_FRAME_RELATED_P (par
) = 1;
20550 /* If the number of registers pushed is odd AND return_in_pc is false OR
20551 number of registers are even AND return_in_pc is true, last register is
20552 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20553 then LDR with post increment. */
20555 /* Increment the stack pointer, based on there being
20556 num_regs 4-byte registers to restore. */
20557 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20558 plus_constant (Pmode
, stack_pointer_rtx
, 4 * i
));
20559 RTX_FRAME_RELATED_P (tmp
) = 1;
20560 tmp
= emit_insn (tmp
);
20563 arm_add_cfa_adjust_cfa_note (tmp
, UNITS_PER_WORD
* i
,
20564 stack_pointer_rtx
, stack_pointer_rtx
);
20569 if (((num_regs
% 2) == 1 && !return_in_pc
)
20570 || ((num_regs
% 2) == 0 && return_in_pc
))
20572 /* Scan for the single register to be popped. Skip until the saved
20573 register is found. */
20574 for (; (saved_regs_mask
& (1 << j
)) == 0; j
++);
20576 /* Gen LDR with post increment here. */
20577 tmp1
= gen_rtx_MEM (SImode
,
20578 gen_rtx_POST_INC (SImode
,
20579 stack_pointer_rtx
));
20580 set_mem_alias_set (tmp1
, get_frame_alias_set ());
20582 reg
= gen_rtx_REG (SImode
, j
);
20583 tmp
= gen_rtx_SET (reg
, tmp1
);
20584 RTX_FRAME_RELATED_P (tmp
) = 1;
20585 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
20589 /* If return_in_pc, j must be PC_REGNUM. */
20590 gcc_assert (j
== PC_REGNUM
);
20591 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20592 XVECEXP (par
, 0, 0) = ret_rtx
;
20593 XVECEXP (par
, 0, 1) = tmp
;
20594 par
= emit_jump_insn (par
);
20598 par
= emit_insn (tmp
);
20599 REG_NOTES (par
) = dwarf
;
20600 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20601 stack_pointer_rtx
, stack_pointer_rtx
);
20605 else if ((num_regs
% 2) == 1 && return_in_pc
)
20607 /* There are 2 registers to be popped. So, generate the pattern
20608 pop_multiple_with_stack_update_and_return to pop in PC. */
20609 arm_emit_multi_reg_pop (saved_regs_mask
& (~((1 << j
) - 1)));
20615 /* LDRD in ARM mode needs consecutive registers as operands. This function
20616 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20617 offset addressing and then generates one separate stack udpate. This provides
20618 more scheduling freedom, compared to writeback on every load. However,
20619 if the function returns using load into PC directly
20620 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20621 before the last load. TODO: Add a peephole optimization to recognize
20622 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20623 peephole optimization to merge the load at stack-offset zero
20624 with the stack update instruction using load with writeback
20625 in post-index addressing mode. */
20627 arm_emit_ldrd_pop (unsigned long saved_regs_mask
)
20631 rtx par
= NULL_RTX
;
20632 rtx dwarf
= NULL_RTX
;
20635 /* Restore saved registers. */
20636 gcc_assert (!((saved_regs_mask
& (1 << SP_REGNUM
))));
20638 while (j
<= LAST_ARM_REGNUM
)
20639 if (saved_regs_mask
& (1 << j
))
20642 && (saved_regs_mask
& (1 << (j
+ 1)))
20643 && (j
+ 1) != PC_REGNUM
)
20645 /* Current register and next register form register pair for which
20646 LDRD can be generated. PC is always the last register popped, and
20647 we handle it separately. */
20649 mem
= gen_frame_mem (DImode
,
20650 plus_constant (Pmode
,
20654 mem
= gen_frame_mem (DImode
, stack_pointer_rtx
);
20656 tmp
= gen_rtx_SET (gen_rtx_REG (DImode
, j
), mem
);
20657 tmp
= emit_insn (tmp
);
20658 RTX_FRAME_RELATED_P (tmp
) = 1;
20660 /* Generate dwarf info. */
20662 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20663 gen_rtx_REG (SImode
, j
),
20665 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20666 gen_rtx_REG (SImode
, j
+ 1),
20669 REG_NOTES (tmp
) = dwarf
;
20674 else if (j
!= PC_REGNUM
)
20676 /* Emit a single word load. */
20678 mem
= gen_frame_mem (SImode
,
20679 plus_constant (Pmode
,
20683 mem
= gen_frame_mem (SImode
, stack_pointer_rtx
);
20685 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, j
), mem
);
20686 tmp
= emit_insn (tmp
);
20687 RTX_FRAME_RELATED_P (tmp
) = 1;
20689 /* Generate dwarf info. */
20690 REG_NOTES (tmp
) = alloc_reg_note (REG_CFA_RESTORE
,
20691 gen_rtx_REG (SImode
, j
),
20697 else /* j == PC_REGNUM */
20703 /* Update the stack. */
20706 tmp
= gen_rtx_SET (stack_pointer_rtx
,
20707 plus_constant (Pmode
,
20710 tmp
= emit_insn (tmp
);
20711 arm_add_cfa_adjust_cfa_note (tmp
, offset
,
20712 stack_pointer_rtx
, stack_pointer_rtx
);
20716 if (saved_regs_mask
& (1 << PC_REGNUM
))
20718 /* Only PC is to be popped. */
20719 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
20720 XVECEXP (par
, 0, 0) = ret_rtx
;
20721 tmp
= gen_rtx_SET (gen_rtx_REG (SImode
, PC_REGNUM
),
20722 gen_frame_mem (SImode
,
20723 gen_rtx_POST_INC (SImode
,
20724 stack_pointer_rtx
)));
20725 RTX_FRAME_RELATED_P (tmp
) = 1;
20726 XVECEXP (par
, 0, 1) = tmp
;
20727 par
= emit_jump_insn (par
);
20729 /* Generate dwarf info. */
20730 dwarf
= alloc_reg_note (REG_CFA_RESTORE
,
20731 gen_rtx_REG (SImode
, PC_REGNUM
),
20733 REG_NOTES (par
) = dwarf
;
20734 arm_add_cfa_adjust_cfa_note (par
, UNITS_PER_WORD
,
20735 stack_pointer_rtx
, stack_pointer_rtx
);
20739 /* Calculate the size of the return value that is passed in registers. */
20741 arm_size_return_regs (void)
20745 if (crtl
->return_rtx
!= 0)
20746 mode
= GET_MODE (crtl
->return_rtx
);
20748 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20750 return GET_MODE_SIZE (mode
);
20753 /* Return true if the current function needs to save/restore LR. */
20755 thumb_force_lr_save (void)
20757 return !cfun
->machine
->lr_save_eliminated
20759 || thumb_far_jump_used_p ()
20760 || df_regs_ever_live_p (LR_REGNUM
));
20763 /* We do not know if r3 will be available because
20764 we do have an indirect tailcall happening in this
20765 particular case. */
20767 is_indirect_tailcall_p (rtx call
)
20769 rtx pat
= PATTERN (call
);
20771 /* Indirect tail call. */
20772 pat
= XVECEXP (pat
, 0, 0);
20773 if (GET_CODE (pat
) == SET
)
20774 pat
= SET_SRC (pat
);
20776 pat
= XEXP (XEXP (pat
, 0), 0);
20777 return REG_P (pat
);
20780 /* Return true if r3 is used by any of the tail call insns in the
20781 current function. */
20783 any_sibcall_could_use_r3 (void)
20788 if (!crtl
->tail_call_emit
)
20790 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20791 if (e
->flags
& EDGE_SIBCALL
)
20793 rtx_insn
*call
= BB_END (e
->src
);
20794 if (!CALL_P (call
))
20795 call
= prev_nonnote_nondebug_insn (call
);
20796 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
20797 if (find_regno_fusage (call
, USE
, 3)
20798 || is_indirect_tailcall_p (call
))
20805 /* Compute the distance from register FROM to register TO.
20806 These can be the arg pointer (26), the soft frame pointer (25),
20807 the stack pointer (13) or the hard frame pointer (11).
20808 In thumb mode r7 is used as the soft frame pointer, if needed.
20809 Typical stack layout looks like this:
20811 old stack pointer -> | |
20814 | | saved arguments for
20815 | | vararg functions
20818 hard FP & arg pointer -> | | \
20826 soft frame pointer -> | | /
20831 locals base pointer -> | | /
20836 current stack pointer -> | | /
20839 For a given function some or all of these stack components
20840 may not be needed, giving rise to the possibility of
20841 eliminating some of the registers.
20843 The values returned by this function must reflect the behavior
20844 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20846 The sign of the number returned reflects the direction of stack
20847 growth, so the values are positive for all eliminations except
20848 from the soft frame pointer to the hard frame pointer.
20850 SFP may point just inside the local variables block to ensure correct
20854 /* Return cached stack offsets. */
20856 static arm_stack_offsets
*
20857 arm_get_frame_offsets (void)
20859 struct arm_stack_offsets
*offsets
;
20861 offsets
= &cfun
->machine
->stack_offsets
;
20867 /* Calculate stack offsets. These are used to calculate register elimination
20868 offsets and in prologue/epilogue code. Also calculates which registers
20869 should be saved. */
20872 arm_compute_frame_layout (void)
20874 struct arm_stack_offsets
*offsets
;
20875 unsigned long func_type
;
20878 HOST_WIDE_INT frame_size
;
20881 offsets
= &cfun
->machine
->stack_offsets
;
20883 /* Initially this is the size of the local variables. It will translated
20884 into an offset once we have determined the size of preceding data. */
20885 frame_size
= ROUND_UP_WORD (get_frame_size ());
20887 /* Space for variadic functions. */
20888 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
20890 /* In Thumb mode this is incorrect, but never used. */
20892 = (offsets
->saved_args
20893 + arm_compute_static_chain_stack_bytes ()
20894 + (frame_pointer_needed
? 4 : 0));
20898 unsigned int regno
;
20900 offsets
->saved_regs_mask
= arm_compute_save_core_reg_mask ();
20901 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20902 saved
= core_saved
;
20904 /* We know that SP will be doubleword aligned on entry, and we must
20905 preserve that condition at any subroutine call. We also require the
20906 soft frame pointer to be doubleword aligned. */
20908 if (TARGET_REALLY_IWMMXT
)
20910 /* Check for the call-saved iWMMXt registers. */
20911 for (regno
= FIRST_IWMMXT_REGNUM
;
20912 regno
<= LAST_IWMMXT_REGNUM
;
20914 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
20918 func_type
= arm_current_func_type ();
20919 /* Space for saved VFP registers. */
20920 if (! IS_VOLATILE (func_type
)
20921 && TARGET_HARD_FLOAT
)
20922 saved
+= arm_get_vfp_saved_size ();
20924 else /* TARGET_THUMB1 */
20926 offsets
->saved_regs_mask
= thumb1_compute_save_core_reg_mask ();
20927 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
20928 saved
= core_saved
;
20929 if (TARGET_BACKTRACE
)
20933 /* Saved registers include the stack frame. */
20934 offsets
->saved_regs
20935 = offsets
->saved_args
+ arm_compute_static_chain_stack_bytes () + saved
;
20936 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
20938 /* A leaf function does not need any stack alignment if it has nothing
20940 if (crtl
->is_leaf
&& frame_size
== 0
20941 /* However if it calls alloca(), we have a dynamically allocated
20942 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20943 && ! cfun
->calls_alloca
)
20945 offsets
->outgoing_args
= offsets
->soft_frame
;
20946 offsets
->locals_base
= offsets
->soft_frame
;
20950 /* Ensure SFP has the correct alignment. */
20951 if (ARM_DOUBLEWORD_ALIGN
20952 && (offsets
->soft_frame
& 7))
20954 offsets
->soft_frame
+= 4;
20955 /* Try to align stack by pushing an extra reg. Don't bother doing this
20956 when there is a stack frame as the alignment will be rolled into
20957 the normal stack adjustment. */
20958 if (frame_size
+ crtl
->outgoing_args_size
== 0)
20962 /* Register r3 is caller-saved. Normally it does not need to be
20963 saved on entry by the prologue. However if we choose to save
20964 it for padding then we may confuse the compiler into thinking
20965 a prologue sequence is required when in fact it is not. This
20966 will occur when shrink-wrapping if r3 is used as a scratch
20967 register and there are no other callee-saved writes.
20969 This situation can be avoided when other callee-saved registers
20970 are available and r3 is not mandatory if we choose a callee-saved
20971 register for padding. */
20972 bool prefer_callee_reg_p
= false;
20974 /* If it is safe to use r3, then do so. This sometimes
20975 generates better code on Thumb-2 by avoiding the need to
20976 use 32-bit push/pop instructions. */
20977 if (! any_sibcall_could_use_r3 ()
20978 && arm_size_return_regs () <= 12
20979 && (offsets
->saved_regs_mask
& (1 << 3)) == 0
20981 || !(TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
)))
20984 if (!TARGET_THUMB2
)
20985 prefer_callee_reg_p
= true;
20988 || prefer_callee_reg_p
)
20990 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
20992 /* Avoid fixed registers; they may be changed at
20993 arbitrary times so it's unsafe to restore them
20994 during the epilogue. */
20996 && (offsets
->saved_regs_mask
& (1 << i
)) == 0)
21006 offsets
->saved_regs
+= 4;
21007 offsets
->saved_regs_mask
|= (1 << reg
);
21012 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
21013 offsets
->outgoing_args
= (offsets
->locals_base
21014 + crtl
->outgoing_args_size
);
21016 if (ARM_DOUBLEWORD_ALIGN
)
21018 /* Ensure SP remains doubleword aligned. */
21019 if (offsets
->outgoing_args
& 7)
21020 offsets
->outgoing_args
+= 4;
21021 gcc_assert (!(offsets
->outgoing_args
& 7));
21026 /* Calculate the relative offsets for the different stack pointers. Positive
21027 offsets are in the direction of stack growth. */
21030 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21032 arm_stack_offsets
*offsets
;
21034 offsets
= arm_get_frame_offsets ();
21036 /* OK, now we have enough information to compute the distances.
21037 There must be an entry in these switch tables for each pair
21038 of registers in ELIMINABLE_REGS, even if some of the entries
21039 seem to be redundant or useless. */
21042 case ARG_POINTER_REGNUM
:
21045 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21048 case FRAME_POINTER_REGNUM
:
21049 /* This is the reverse of the soft frame pointer
21050 to hard frame pointer elimination below. */
21051 return offsets
->soft_frame
- offsets
->saved_args
;
21053 case ARM_HARD_FRAME_POINTER_REGNUM
:
21054 /* This is only non-zero in the case where the static chain register
21055 is stored above the frame. */
21056 return offsets
->frame
- offsets
->saved_args
- 4;
21058 case STACK_POINTER_REGNUM
:
21059 /* If nothing has been pushed on the stack at all
21060 then this will return -4. This *is* correct! */
21061 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
21064 gcc_unreachable ();
21066 gcc_unreachable ();
21068 case FRAME_POINTER_REGNUM
:
21071 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21074 case ARM_HARD_FRAME_POINTER_REGNUM
:
21075 /* The hard frame pointer points to the top entry in the
21076 stack frame. The soft frame pointer to the bottom entry
21077 in the stack frame. If there is no stack frame at all,
21078 then they are identical. */
21080 return offsets
->frame
- offsets
->soft_frame
;
21082 case STACK_POINTER_REGNUM
:
21083 return offsets
->outgoing_args
- offsets
->soft_frame
;
21086 gcc_unreachable ();
21088 gcc_unreachable ();
21091 /* You cannot eliminate from the stack pointer.
21092 In theory you could eliminate from the hard frame
21093 pointer to the stack pointer, but this will never
21094 happen, since if a stack frame is not needed the
21095 hard frame pointer will never be used. */
21096 gcc_unreachable ();
21100 /* Given FROM and TO register numbers, say whether this elimination is
21101 allowed. Frame pointer elimination is automatically handled.
21103 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21104 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21105 pointer, we must eliminate FRAME_POINTER_REGNUM into
21106 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21107 ARG_POINTER_REGNUM. */
21110 arm_can_eliminate (const int from
, const int to
)
21112 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
21113 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
21114 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
21115 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
21119 /* Emit RTL to save coprocessor registers on function entry. Returns the
21120 number of bytes pushed. */
21123 arm_save_coproc_regs(void)
21125 int saved_size
= 0;
21127 unsigned start_reg
;
21130 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
21131 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
21133 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21134 insn
= gen_rtx_MEM (V2SImode
, insn
);
21135 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
21136 RTX_FRAME_RELATED_P (insn
) = 1;
21140 if (TARGET_HARD_FLOAT
)
21142 start_reg
= FIRST_VFP_REGNUM
;
21144 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
21146 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
21147 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
21149 if (start_reg
!= reg
)
21150 saved_size
+= vfp_emit_fstmd (start_reg
,
21151 (reg
- start_reg
) / 2);
21152 start_reg
= reg
+ 2;
21155 if (start_reg
!= reg
)
21156 saved_size
+= vfp_emit_fstmd (start_reg
,
21157 (reg
- start_reg
) / 2);
21163 /* Set the Thumb frame pointer from the stack pointer. */
21166 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
21168 HOST_WIDE_INT amount
;
21171 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
21173 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21174 stack_pointer_rtx
, GEN_INT (amount
)));
21177 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
21178 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21179 expects the first two operands to be the same. */
21182 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21184 hard_frame_pointer_rtx
));
21188 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21189 hard_frame_pointer_rtx
,
21190 stack_pointer_rtx
));
21192 dwarf
= gen_rtx_SET (hard_frame_pointer_rtx
,
21193 plus_constant (Pmode
, stack_pointer_rtx
, amount
));
21194 RTX_FRAME_RELATED_P (dwarf
) = 1;
21195 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21198 RTX_FRAME_RELATED_P (insn
) = 1;
21201 struct scratch_reg
{
21206 /* Return a short-lived scratch register for use as a 2nd scratch register on
21207 function entry after the registers are saved in the prologue. This register
21208 must be released by means of release_scratch_register_on_entry. IP is not
21209 considered since it is always used as the 1st scratch register if available.
21211 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21212 mask of live registers. */
21215 get_scratch_register_on_entry (struct scratch_reg
*sr
, unsigned int regno1
,
21216 unsigned long live_regs
)
21222 if (regno1
!= LR_REGNUM
&& (live_regs
& (1 << LR_REGNUM
)) != 0)
21228 for (i
= 4; i
< 11; i
++)
21229 if (regno1
!= i
&& (live_regs
& (1 << i
)) != 0)
21237 /* If IP is used as the 1st scratch register for a nested function,
21238 then either r3 wasn't available or is used to preserve IP. */
21239 if (regno1
== IP_REGNUM
&& IS_NESTED (arm_current_func_type ()))
21241 regno
= (regno1
== 3 ? 2 : 3);
21243 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)),
21248 sr
->reg
= gen_rtx_REG (SImode
, regno
);
21251 rtx addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21252 rtx insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), sr
->reg
);
21253 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21254 plus_constant (Pmode
, stack_pointer_rtx
, -4));
21255 RTX_FRAME_RELATED_P (insn
) = 1;
21256 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21260 /* Release a scratch register obtained from the preceding function. */
21263 release_scratch_register_on_entry (struct scratch_reg
*sr
)
21267 rtx addr
= gen_rtx_POST_INC (Pmode
, stack_pointer_rtx
);
21268 rtx insn
= emit_set_insn (sr
->reg
, gen_frame_mem (SImode
, addr
));
21269 rtx x
= gen_rtx_SET (stack_pointer_rtx
,
21270 plus_constant (Pmode
, stack_pointer_rtx
, 4));
21271 RTX_FRAME_RELATED_P (insn
) = 1;
21272 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
21276 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21278 #if PROBE_INTERVAL > 4096
21279 #error Cannot use indexed addressing mode for stack probing
21282 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21283 inclusive. These are offsets from the current stack pointer. REGNO1
21284 is the index number of the 1st scratch register and LIVE_REGS is the
21285 mask of live registers. */
21288 arm_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
21289 unsigned int regno1
, unsigned long live_regs
)
21291 rtx reg1
= gen_rtx_REG (Pmode
, regno1
);
21293 /* See if we have a constant small number of probes to generate. If so,
21294 that's the easy case. */
21295 if (size
<= PROBE_INTERVAL
)
21297 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21298 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21299 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- size
));
21302 /* The run-time loop is made up of 10 insns in the generic case while the
21303 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21304 else if (size
<= 5 * PROBE_INTERVAL
)
21306 HOST_WIDE_INT i
, rem
;
21308 emit_move_insn (reg1
, GEN_INT (first
+ PROBE_INTERVAL
));
21309 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21310 emit_stack_probe (reg1
);
21312 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21313 it exceeds SIZE. If only two probes are needed, this will not
21314 generate any code. Then probe at FIRST + SIZE. */
21315 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
21317 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21318 emit_stack_probe (reg1
);
21321 rem
= size
- (i
- PROBE_INTERVAL
);
21322 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21324 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
21325 emit_stack_probe (plus_constant (Pmode
, reg1
, PROBE_INTERVAL
- rem
));
21328 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
21331 /* Otherwise, do the same as above, but in a loop. Note that we must be
21332 extra careful with variables wrapping around because we might be at
21333 the very top (or the very bottom) of the address space and we have
21334 to be able to handle this case properly; in particular, we use an
21335 equality test for the loop condition. */
21338 HOST_WIDE_INT rounded_size
;
21339 struct scratch_reg sr
;
21341 get_scratch_register_on_entry (&sr
, regno1
, live_regs
);
21343 emit_move_insn (reg1
, GEN_INT (first
));
21346 /* Step 1: round SIZE to the previous multiple of the interval. */
21348 rounded_size
= size
& -PROBE_INTERVAL
;
21349 emit_move_insn (sr
.reg
, GEN_INT (rounded_size
));
21352 /* Step 2: compute initial and final value of the loop counter. */
21354 /* TEST_ADDR = SP + FIRST. */
21355 emit_set_insn (reg1
, gen_rtx_MINUS (Pmode
, stack_pointer_rtx
, reg1
));
21357 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21358 emit_set_insn (sr
.reg
, gen_rtx_MINUS (Pmode
, reg1
, sr
.reg
));
21361 /* Step 3: the loop
21365 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21368 while (TEST_ADDR != LAST_ADDR)
21370 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21371 until it is equal to ROUNDED_SIZE. */
21373 emit_insn (gen_probe_stack_range (reg1
, reg1
, sr
.reg
));
21376 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21377 that SIZE is equal to ROUNDED_SIZE. */
21379 if (size
!= rounded_size
)
21381 HOST_WIDE_INT rem
= size
- rounded_size
;
21383 if (rem
> 4095 || (TARGET_THUMB2
&& rem
> 255))
21385 emit_set_insn (sr
.reg
,
21386 plus_constant (Pmode
, sr
.reg
, -PROBE_INTERVAL
));
21387 emit_stack_probe (plus_constant (Pmode
, sr
.reg
,
21388 PROBE_INTERVAL
- rem
));
21391 emit_stack_probe (plus_constant (Pmode
, sr
.reg
, -rem
));
21394 release_scratch_register_on_entry (&sr
);
21397 /* Make sure nothing is scheduled before we are done. */
21398 emit_insn (gen_blockage ());
21401 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21402 absolute addresses. */
21405 output_probe_stack_range (rtx reg1
, rtx reg2
)
21407 static int labelno
= 0;
21411 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
21414 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
21416 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21418 xops
[1] = GEN_INT (PROBE_INTERVAL
);
21419 output_asm_insn ("sub\t%0, %0, %1", xops
);
21421 /* Probe at TEST_ADDR. */
21422 output_asm_insn ("str\tr0, [%0, #0]", xops
);
21424 /* Test if TEST_ADDR == LAST_ADDR. */
21426 output_asm_insn ("cmp\t%0, %1", xops
);
21429 fputs ("\tbne\t", asm_out_file
);
21430 assemble_name_raw (asm_out_file
, loop_lab
);
21431 fputc ('\n', asm_out_file
);
21436 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21439 arm_expand_prologue (void)
21444 unsigned long live_regs_mask
;
21445 unsigned long func_type
;
21447 int saved_pretend_args
= 0;
21448 int saved_regs
= 0;
21449 unsigned HOST_WIDE_INT args_to_push
;
21450 HOST_WIDE_INT size
;
21451 arm_stack_offsets
*offsets
;
21454 func_type
= arm_current_func_type ();
21456 /* Naked functions don't have prologues. */
21457 if (IS_NAKED (func_type
))
21459 if (flag_stack_usage_info
)
21460 current_function_static_stack_size
= 0;
21464 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21465 args_to_push
= crtl
->args
.pretend_args_size
;
21467 /* Compute which register we will have to save onto the stack. */
21468 offsets
= arm_get_frame_offsets ();
21469 live_regs_mask
= offsets
->saved_regs_mask
;
21471 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
21473 if (IS_STACKALIGN (func_type
))
21477 /* Handle a word-aligned stack pointer. We generate the following:
21482 <save and restore r0 in normal prologue/epilogue>
21486 The unwinder doesn't need to know about the stack realignment.
21487 Just tell it we saved SP in r0. */
21488 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
21490 r0
= gen_rtx_REG (SImode
, R0_REGNUM
);
21491 r1
= gen_rtx_REG (SImode
, R1_REGNUM
);
21493 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
21494 RTX_FRAME_RELATED_P (insn
) = 1;
21495 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
21497 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
21499 /* ??? The CFA changes here, which may cause GDB to conclude that it
21500 has entered a different function. That said, the unwind info is
21501 correct, individually, before and after this instruction because
21502 we've described the save of SP, which will override the default
21503 handling of SP as restoring from the CFA. */
21504 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
21507 /* The static chain register is the same as the IP register. If it is
21508 clobbered when creating the frame, we need to save and restore it. */
21509 clobber_ip
= IS_NESTED (func_type
)
21510 && ((TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21511 || ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21512 || flag_stack_clash_protection
)
21513 && !df_regs_ever_live_p (LR_REGNUM
)
21514 && arm_r3_live_at_start_p ()));
21516 /* Find somewhere to store IP whilst the frame is being created.
21517 We try the following places in order:
21519 1. The last argument register r3 if it is available.
21520 2. A slot on the stack above the frame if there are no
21521 arguments to push onto the stack.
21522 3. Register r3 again, after pushing the argument registers
21523 onto the stack, if this is a varargs function.
21524 4. The last slot on the stack created for the arguments to
21525 push, if this isn't a varargs function.
21527 Note - we only need to tell the dwarf2 backend about the SP
21528 adjustment in the second variant; the static chain register
21529 doesn't need to be unwound, as it doesn't contain a value
21530 inherited from the caller. */
21533 if (!arm_r3_live_at_start_p ())
21534 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21535 else if (args_to_push
== 0)
21539 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21542 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21543 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21546 /* Just tell the dwarf backend that we adjusted SP. */
21547 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21548 plus_constant (Pmode
, stack_pointer_rtx
,
21550 RTX_FRAME_RELATED_P (insn
) = 1;
21551 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21555 /* Store the args on the stack. */
21556 if (cfun
->machine
->uses_anonymous_args
)
21558 insn
= emit_multi_reg_push ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21559 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21560 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
21561 saved_pretend_args
= 1;
21567 if (args_to_push
== 4)
21568 addr
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
21570 addr
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
,
21571 plus_constant (Pmode
,
21575 insn
= emit_set_insn (gen_frame_mem (SImode
, addr
), ip_rtx
);
21577 /* Just tell the dwarf backend that we adjusted SP. */
21578 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
21579 plus_constant (Pmode
, stack_pointer_rtx
,
21581 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21584 RTX_FRAME_RELATED_P (insn
) = 1;
21585 fp_offset
= args_to_push
;
21590 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
21592 if (IS_INTERRUPT (func_type
))
21594 /* Interrupt functions must not corrupt any registers.
21595 Creating a frame pointer however, corrupts the IP
21596 register, so we must push it first. */
21597 emit_multi_reg_push (1 << IP_REGNUM
, 1 << IP_REGNUM
);
21599 /* Do not set RTX_FRAME_RELATED_P on this insn.
21600 The dwarf stack unwinding code only wants to see one
21601 stack decrement per function, and this is not it. If
21602 this instruction is labeled as being part of the frame
21603 creation sequence then dwarf2out_frame_debug_expr will
21604 die when it encounters the assignment of IP to FP
21605 later on, since the use of SP here establishes SP as
21606 the CFA register and not IP.
21608 Anyway this instruction is not really part of the stack
21609 frame creation although it is part of the prologue. */
21612 insn
= emit_set_insn (ip_rtx
,
21613 plus_constant (Pmode
, stack_pointer_rtx
,
21615 RTX_FRAME_RELATED_P (insn
) = 1;
21620 /* Push the argument registers, or reserve space for them. */
21621 if (cfun
->machine
->uses_anonymous_args
)
21622 insn
= emit_multi_reg_push
21623 ((0xf0 >> (args_to_push
/ 4)) & 0xf,
21624 (0xf0 >> (args_to_push
/ 4)) & 0xf);
21627 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21628 GEN_INT (- args_to_push
)));
21629 RTX_FRAME_RELATED_P (insn
) = 1;
21632 /* If this is an interrupt service routine, and the link register
21633 is going to be pushed, and we're not generating extra
21634 push of IP (needed when frame is needed and frame layout if apcs),
21635 subtracting four from LR now will mean that the function return
21636 can be done with a single instruction. */
21637 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
21638 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
21639 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
21642 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
21644 emit_set_insn (lr
, plus_constant (SImode
, lr
, -4));
21647 if (live_regs_mask
)
21649 unsigned long dwarf_regs_mask
= live_regs_mask
;
21651 saved_regs
+= bit_count (live_regs_mask
) * 4;
21652 if (optimize_size
&& !frame_pointer_needed
21653 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
21655 /* If no coprocessor registers are being pushed and we don't have
21656 to worry about a frame pointer then push extra registers to
21657 create the stack frame. This is done in a way that does not
21658 alter the frame layout, so is independent of the epilogue. */
21662 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
21664 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
21665 if (frame
&& n
* 4 >= frame
)
21668 live_regs_mask
|= (1 << n
) - 1;
21669 saved_regs
+= frame
;
21674 && current_tune
->prefer_ldrd_strd
21675 && !optimize_function_for_size_p (cfun
))
21677 gcc_checking_assert (live_regs_mask
== dwarf_regs_mask
);
21679 thumb2_emit_strd_push (live_regs_mask
);
21680 else if (TARGET_ARM
21681 && !TARGET_APCS_FRAME
21682 && !IS_INTERRUPT (func_type
))
21683 arm_emit_strd_push (live_regs_mask
);
21686 insn
= emit_multi_reg_push (live_regs_mask
, live_regs_mask
);
21687 RTX_FRAME_RELATED_P (insn
) = 1;
21692 insn
= emit_multi_reg_push (live_regs_mask
, dwarf_regs_mask
);
21693 RTX_FRAME_RELATED_P (insn
) = 1;
21697 if (! IS_VOLATILE (func_type
))
21698 saved_regs
+= arm_save_coproc_regs ();
21700 if (frame_pointer_needed
&& TARGET_ARM
)
21702 /* Create the new frame pointer. */
21703 if (TARGET_APCS_FRAME
)
21705 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
21706 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
21707 RTX_FRAME_RELATED_P (insn
) = 1;
21711 insn
= GEN_INT (saved_regs
- (4 + fp_offset
));
21712 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
21713 stack_pointer_rtx
, insn
));
21714 RTX_FRAME_RELATED_P (insn
) = 1;
21718 size
= offsets
->outgoing_args
- offsets
->saved_args
;
21719 if (flag_stack_usage_info
)
21720 current_function_static_stack_size
= size
;
21722 /* If this isn't an interrupt service routine and we have a frame, then do
21723 stack checking. We use IP as the first scratch register, except for the
21724 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21725 if (!IS_INTERRUPT (func_type
)
21726 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
21727 || flag_stack_clash_protection
))
21729 unsigned int regno
;
21731 if (!IS_NESTED (func_type
) || clobber_ip
)
21733 else if (df_regs_ever_live_p (LR_REGNUM
))
21738 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
21740 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
21741 arm_emit_probe_stack_range (get_stack_check_protect (),
21742 size
- get_stack_check_protect (),
21743 regno
, live_regs_mask
);
21746 arm_emit_probe_stack_range (get_stack_check_protect (), size
,
21747 regno
, live_regs_mask
);
21750 /* Recover the static chain register. */
21753 if (!arm_r3_live_at_start_p () || saved_pretend_args
)
21754 insn
= gen_rtx_REG (SImode
, 3);
21757 insn
= plus_constant (Pmode
, hard_frame_pointer_rtx
, 4);
21758 insn
= gen_frame_mem (SImode
, insn
);
21760 emit_set_insn (ip_rtx
, insn
);
21761 emit_insn (gen_force_register_use (ip_rtx
));
21764 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
21766 /* This add can produce multiple insns for a large constant, so we
21767 need to get tricky. */
21768 rtx_insn
*last
= get_last_insn ();
21770 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
21771 - offsets
->outgoing_args
);
21773 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21777 last
= last
? NEXT_INSN (last
) : get_insns ();
21778 RTX_FRAME_RELATED_P (last
) = 1;
21780 while (last
!= insn
);
21782 /* If the frame pointer is needed, emit a special barrier that
21783 will prevent the scheduler from moving stores to the frame
21784 before the stack adjustment. */
21785 if (frame_pointer_needed
)
21786 emit_insn (gen_stack_tie (stack_pointer_rtx
,
21787 hard_frame_pointer_rtx
));
21791 if (frame_pointer_needed
&& TARGET_THUMB2
)
21792 thumb_set_frame_pointer (offsets
);
21794 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21796 unsigned long mask
;
21798 mask
= live_regs_mask
;
21799 mask
&= THUMB2_WORK_REGS
;
21800 if (!IS_NESTED (func_type
))
21801 mask
|= (1 << IP_REGNUM
);
21802 arm_load_pic_register (mask
);
21805 /* If we are profiling, make sure no instructions are scheduled before
21806 the call to mcount. Similarly if the user has requested no
21807 scheduling in the prolog. Similarly if we want non-call exceptions
21808 using the EABI unwinder, to prevent faulting instructions from being
21809 swapped with a stack adjustment. */
21810 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21811 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21812 && cfun
->can_throw_non_call_exceptions
))
21813 emit_insn (gen_blockage ());
21815 /* If the link register is being kept alive, with the return address in it,
21816 then make sure that it does not get reused by the ce2 pass. */
21817 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
21818 cfun
->machine
->lr_save_eliminated
= 1;
21821 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21823 arm_print_condition (FILE *stream
)
21825 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
21827 /* Branch conversion is not implemented for Thumb-2. */
21830 output_operand_lossage ("predicated Thumb instruction");
21833 if (current_insn_predicate
!= NULL
)
21835 output_operand_lossage
21836 ("predicated instruction in conditional sequence");
21840 fputs (arm_condition_codes
[arm_current_cc
], stream
);
21842 else if (current_insn_predicate
)
21844 enum arm_cond_code code
;
21848 output_operand_lossage ("predicated Thumb instruction");
21852 code
= get_arm_condition_code (current_insn_predicate
);
21853 fputs (arm_condition_codes
[code
], stream
);
21858 /* Globally reserved letters: acln
21859 Puncutation letters currently used: @_|?().!#
21860 Lower case letters currently used: bcdefhimpqtvwxyz
21861 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21862 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21864 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21866 If CODE is 'd', then the X is a condition operand and the instruction
21867 should only be executed if the condition is true.
21868 if CODE is 'D', then the X is a condition operand and the instruction
21869 should only be executed if the condition is false: however, if the mode
21870 of the comparison is CCFPEmode, then always execute the instruction -- we
21871 do this because in these circumstances !GE does not necessarily imply LT;
21872 in these cases the instruction pattern will take care to make sure that
21873 an instruction containing %d will follow, thereby undoing the effects of
21874 doing this instruction unconditionally.
21875 If CODE is 'N' then X is a floating point operand that must be negated
21877 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21878 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21880 arm_print_operand (FILE *stream
, rtx x
, int code
)
21885 fputs (ASM_COMMENT_START
, stream
);
21889 fputs (user_label_prefix
, stream
);
21893 fputs (REGISTER_PREFIX
, stream
);
21897 arm_print_condition (stream
);
21901 /* The current condition code for a condition code setting instruction.
21902 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21903 fputc('s', stream
);
21904 arm_print_condition (stream
);
21908 /* If the instruction is conditionally executed then print
21909 the current condition code, otherwise print 's'. */
21910 gcc_assert (TARGET_THUMB2
);
21911 if (current_insn_predicate
)
21912 arm_print_condition (stream
);
21914 fputc('s', stream
);
21917 /* %# is a "break" sequence. It doesn't output anything, but is used to
21918 separate e.g. operand numbers from following text, if that text consists
21919 of further digits which we don't want to be part of the operand
21927 r
= real_value_negate (CONST_DOUBLE_REAL_VALUE (x
));
21928 fprintf (stream
, "%s", fp_const_from_val (&r
));
21932 /* An integer or symbol address without a preceding # sign. */
21934 switch (GET_CODE (x
))
21937 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
21941 output_addr_const (stream
, x
);
21945 if (GET_CODE (XEXP (x
, 0)) == PLUS
21946 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
21948 output_addr_const (stream
, x
);
21951 /* Fall through. */
21954 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21958 /* An integer that we want to print in HEX. */
21960 switch (GET_CODE (x
))
21963 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
21967 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21972 if (CONST_INT_P (x
))
21975 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
21976 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
21980 putc ('~', stream
);
21981 output_addr_const (stream
, x
);
21986 /* Print the log2 of a CONST_INT. */
21990 if (!CONST_INT_P (x
)
21991 || (val
= exact_log2 (INTVAL (x
) & 0xffffffff)) < 0)
21992 output_operand_lossage ("Unsupported operand for code '%c'", code
);
21994 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
21999 /* The low 16 bits of an immediate constant. */
22000 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
22004 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
22008 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
22016 shift
= shift_op (x
, &val
);
22020 fprintf (stream
, ", %s ", shift
);
22022 arm_print_operand (stream
, XEXP (x
, 1), 0);
22024 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
22029 /* An explanation of the 'Q', 'R' and 'H' register operands:
22031 In a pair of registers containing a DI or DF value the 'Q'
22032 operand returns the register number of the register containing
22033 the least significant part of the value. The 'R' operand returns
22034 the register number of the register containing the most
22035 significant part of the value.
22037 The 'H' operand returns the higher of the two register numbers.
22038 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22039 same as the 'Q' operand, since the most significant part of the
22040 value is held in the lower number register. The reverse is true
22041 on systems where WORDS_BIG_ENDIAN is false.
22043 The purpose of these operands is to distinguish between cases
22044 where the endian-ness of the values is important (for example
22045 when they are added together), and cases where the endian-ness
22046 is irrelevant, but the order of register operations is important.
22047 For example when loading a value from memory into a register
22048 pair, the endian-ness does not matter. Provided that the value
22049 from the lower memory address is put into the lower numbered
22050 register, and the value from the higher address is put into the
22051 higher numbered register, the load will work regardless of whether
22052 the value being loaded is big-wordian or little-wordian. The
22053 order of the two register loads can matter however, if the address
22054 of the memory location is actually held in one of the registers
22055 being overwritten by the load.
22057 The 'Q' and 'R' constraints are also available for 64-bit
22060 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22062 rtx part
= gen_lowpart (SImode
, x
);
22063 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22067 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22069 output_operand_lossage ("invalid operand for code '%c'", code
);
22073 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
22077 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
22079 machine_mode mode
= GET_MODE (x
);
22082 if (mode
== VOIDmode
)
22084 part
= gen_highpart_mode (SImode
, mode
, x
);
22085 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
22089 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22091 output_operand_lossage ("invalid operand for code '%c'", code
);
22095 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
22099 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22101 output_operand_lossage ("invalid operand for code '%c'", code
);
22105 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
22109 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22111 output_operand_lossage ("invalid operand for code '%c'", code
);
22115 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
22119 if (!REG_P (x
) || REGNO (x
) > LAST_ARM_REGNUM
)
22121 output_operand_lossage ("invalid operand for code '%c'", code
);
22125 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
22129 asm_fprintf (stream
, "%r",
22130 REG_P (XEXP (x
, 0))
22131 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
22135 asm_fprintf (stream
, "{%r-%r}",
22137 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
22140 /* Like 'M', but writing doubleword vector registers, for use by Neon
22144 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
22145 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
22147 asm_fprintf (stream
, "{d%d}", regno
);
22149 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
22154 /* CONST_TRUE_RTX means always -- that's the default. */
22155 if (x
== const_true_rtx
)
22158 if (!COMPARISON_P (x
))
22160 output_operand_lossage ("invalid operand for code '%c'", code
);
22164 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
22169 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22170 want to do that. */
22171 if (x
== const_true_rtx
)
22173 output_operand_lossage ("instruction never executed");
22176 if (!COMPARISON_P (x
))
22178 output_operand_lossage ("invalid operand for code '%c'", code
);
22182 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
22183 (get_arm_condition_code (x
))],
22193 /* Former Maverick support, removed after GCC-4.7. */
22194 output_operand_lossage ("obsolete Maverick format code '%c'", code
);
22199 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
22200 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
22201 /* Bad value for wCG register number. */
22203 output_operand_lossage ("invalid operand for code '%c'", code
);
22208 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
22211 /* Print an iWMMXt control register name. */
22213 if (!CONST_INT_P (x
)
22215 || INTVAL (x
) >= 16)
22216 /* Bad value for wC register number. */
22218 output_operand_lossage ("invalid operand for code '%c'", code
);
22224 static const char * wc_reg_names
[16] =
22226 "wCID", "wCon", "wCSSF", "wCASF",
22227 "wC4", "wC5", "wC6", "wC7",
22228 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22229 "wC12", "wC13", "wC14", "wC15"
22232 fputs (wc_reg_names
[INTVAL (x
)], stream
);
22236 /* Print the high single-precision register of a VFP double-precision
22240 machine_mode mode
= GET_MODE (x
);
22243 if (GET_MODE_SIZE (mode
) != 8 || !REG_P (x
))
22245 output_operand_lossage ("invalid operand for code '%c'", code
);
22250 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
22252 output_operand_lossage ("invalid operand for code '%c'", code
);
22256 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
22260 /* Print a VFP/Neon double precision or quad precision register name. */
22264 machine_mode mode
= GET_MODE (x
);
22265 int is_quad
= (code
== 'q');
22268 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
22270 output_operand_lossage ("invalid operand for code '%c'", code
);
22275 || !IS_VFP_REGNUM (REGNO (x
)))
22277 output_operand_lossage ("invalid operand for code '%c'", code
);
22282 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
22283 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
22285 output_operand_lossage ("invalid operand for code '%c'", code
);
22289 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
22290 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
22294 /* These two codes print the low/high doubleword register of a Neon quad
22295 register, respectively. For pair-structure types, can also print
22296 low/high quadword registers. */
22300 machine_mode mode
= GET_MODE (x
);
22303 if ((GET_MODE_SIZE (mode
) != 16
22304 && GET_MODE_SIZE (mode
) != 32) || !REG_P (x
))
22306 output_operand_lossage ("invalid operand for code '%c'", code
);
22311 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
22313 output_operand_lossage ("invalid operand for code '%c'", code
);
22317 if (GET_MODE_SIZE (mode
) == 16)
22318 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
22319 + (code
== 'f' ? 1 : 0));
22321 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
22322 + (code
== 'f' ? 1 : 0));
22326 /* Print a VFPv3 floating-point constant, represented as an integer
22330 int index
= vfp3_const_double_index (x
);
22331 gcc_assert (index
!= -1);
22332 fprintf (stream
, "%d", index
);
22336 /* Print bits representing opcode features for Neon.
22338 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22339 and polynomials as unsigned.
22341 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22343 Bit 2 is 1 for rounding functions, 0 otherwise. */
22345 /* Identify the type as 's', 'u', 'p' or 'f'. */
22348 HOST_WIDE_INT bits
= INTVAL (x
);
22349 fputc ("uspf"[bits
& 3], stream
);
22353 /* Likewise, but signed and unsigned integers are both 'i'. */
22356 HOST_WIDE_INT bits
= INTVAL (x
);
22357 fputc ("iipf"[bits
& 3], stream
);
22361 /* As for 'T', but emit 'u' instead of 'p'. */
22364 HOST_WIDE_INT bits
= INTVAL (x
);
22365 fputc ("usuf"[bits
& 3], stream
);
22369 /* Bit 2: rounding (vs none). */
22372 HOST_WIDE_INT bits
= INTVAL (x
);
22373 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
22377 /* Memory operand for vld1/vst1 instruction. */
22381 bool postinc
= FALSE
;
22382 rtx postinc_reg
= NULL
;
22383 unsigned align
, memsize
, align_bits
;
22385 gcc_assert (MEM_P (x
));
22386 addr
= XEXP (x
, 0);
22387 if (GET_CODE (addr
) == POST_INC
)
22390 addr
= XEXP (addr
, 0);
22392 if (GET_CODE (addr
) == POST_MODIFY
)
22394 postinc_reg
= XEXP( XEXP (addr
, 1), 1);
22395 addr
= XEXP (addr
, 0);
22397 asm_fprintf (stream
, "[%r", REGNO (addr
));
22399 /* We know the alignment of this access, so we can emit a hint in the
22400 instruction (for some alignments) as an aid to the memory subsystem
22402 align
= MEM_ALIGN (x
) >> 3;
22403 memsize
= MEM_SIZE (x
);
22405 /* Only certain alignment specifiers are supported by the hardware. */
22406 if (memsize
== 32 && (align
% 32) == 0)
22408 else if ((memsize
== 16 || memsize
== 32) && (align
% 16) == 0)
22410 else if (memsize
>= 8 && (align
% 8) == 0)
22415 if (align_bits
!= 0)
22416 asm_fprintf (stream
, ":%d", align_bits
);
22418 asm_fprintf (stream
, "]");
22421 fputs("!", stream
);
22423 asm_fprintf (stream
, ", %r", REGNO (postinc_reg
));
22431 gcc_assert (MEM_P (x
));
22432 addr
= XEXP (x
, 0);
22433 gcc_assert (REG_P (addr
));
22434 asm_fprintf (stream
, "[%r]", REGNO (addr
));
22438 /* Translate an S register number into a D register number and element index. */
22441 machine_mode mode
= GET_MODE (x
);
22444 if (GET_MODE_SIZE (mode
) != 4 || !REG_P (x
))
22446 output_operand_lossage ("invalid operand for code '%c'", code
);
22451 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22453 output_operand_lossage ("invalid operand for code '%c'", code
);
22457 regno
= regno
- FIRST_VFP_REGNUM
;
22458 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
22463 gcc_assert (CONST_DOUBLE_P (x
));
22465 result
= vfp3_const_double_for_fract_bits (x
);
22467 result
= vfp3_const_double_for_bits (x
);
22468 fprintf (stream
, "#%d", result
);
22471 /* Register specifier for vld1.16/vst1.16. Translate the S register
22472 number into a D register number and element index. */
22475 machine_mode mode
= GET_MODE (x
);
22478 if (GET_MODE_SIZE (mode
) != 2 || !REG_P (x
))
22480 output_operand_lossage ("invalid operand for code '%c'", code
);
22485 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
22487 output_operand_lossage ("invalid operand for code '%c'", code
);
22491 regno
= regno
- FIRST_VFP_REGNUM
;
22492 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
22499 output_operand_lossage ("missing operand");
22503 switch (GET_CODE (x
))
22506 asm_fprintf (stream
, "%r", REGNO (x
));
22510 output_address (GET_MODE (x
), XEXP (x
, 0));
22516 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
22517 sizeof (fpstr
), 0, 1);
22518 fprintf (stream
, "#%s", fpstr
);
22523 gcc_assert (GET_CODE (x
) != NEG
);
22524 fputc ('#', stream
);
22525 if (GET_CODE (x
) == HIGH
)
22527 fputs (":lower16:", stream
);
22531 output_addr_const (stream
, x
);
22537 /* Target hook for printing a memory address. */
22539 arm_print_operand_address (FILE *stream
, machine_mode mode
, rtx x
)
22543 int is_minus
= GET_CODE (x
) == MINUS
;
22546 asm_fprintf (stream
, "[%r]", REGNO (x
));
22547 else if (GET_CODE (x
) == PLUS
|| is_minus
)
22549 rtx base
= XEXP (x
, 0);
22550 rtx index
= XEXP (x
, 1);
22551 HOST_WIDE_INT offset
= 0;
22553 || (REG_P (index
) && REGNO (index
) == SP_REGNUM
))
22555 /* Ensure that BASE is a register. */
22556 /* (one of them must be). */
22557 /* Also ensure the SP is not used as in index register. */
22558 std::swap (base
, index
);
22560 switch (GET_CODE (index
))
22563 offset
= INTVAL (index
);
22566 asm_fprintf (stream
, "[%r, #%wd]",
22567 REGNO (base
), offset
);
22571 asm_fprintf (stream
, "[%r, %s%r]",
22572 REGNO (base
), is_minus
? "-" : "",
22582 asm_fprintf (stream
, "[%r, %s%r",
22583 REGNO (base
), is_minus
? "-" : "",
22584 REGNO (XEXP (index
, 0)));
22585 arm_print_operand (stream
, index
, 'S');
22586 fputs ("]", stream
);
22591 gcc_unreachable ();
22594 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
22595 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
22597 gcc_assert (REG_P (XEXP (x
, 0)));
22599 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
22600 asm_fprintf (stream
, "[%r, #%s%d]!",
22601 REGNO (XEXP (x
, 0)),
22602 GET_CODE (x
) == PRE_DEC
? "-" : "",
22603 GET_MODE_SIZE (mode
));
22605 asm_fprintf (stream
, "[%r], #%s%d",
22606 REGNO (XEXP (x
, 0)),
22607 GET_CODE (x
) == POST_DEC
? "-" : "",
22608 GET_MODE_SIZE (mode
));
22610 else if (GET_CODE (x
) == PRE_MODIFY
)
22612 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
22613 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22614 asm_fprintf (stream
, "#%wd]!",
22615 INTVAL (XEXP (XEXP (x
, 1), 1)));
22617 asm_fprintf (stream
, "%r]!",
22618 REGNO (XEXP (XEXP (x
, 1), 1)));
22620 else if (GET_CODE (x
) == POST_MODIFY
)
22622 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
22623 if (CONST_INT_P (XEXP (XEXP (x
, 1), 1)))
22624 asm_fprintf (stream
, "#%wd",
22625 INTVAL (XEXP (XEXP (x
, 1), 1)));
22627 asm_fprintf (stream
, "%r",
22628 REGNO (XEXP (XEXP (x
, 1), 1)));
22630 else output_addr_const (stream
, x
);
22635 asm_fprintf (stream
, "[%r]", REGNO (x
));
22636 else if (GET_CODE (x
) == POST_INC
)
22637 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
22638 else if (GET_CODE (x
) == PLUS
)
22640 gcc_assert (REG_P (XEXP (x
, 0)));
22641 if (CONST_INT_P (XEXP (x
, 1)))
22642 asm_fprintf (stream
, "[%r, #%wd]",
22643 REGNO (XEXP (x
, 0)),
22644 INTVAL (XEXP (x
, 1)));
22646 asm_fprintf (stream
, "[%r, %r]",
22647 REGNO (XEXP (x
, 0)),
22648 REGNO (XEXP (x
, 1)));
22651 output_addr_const (stream
, x
);
22655 /* Target hook for indicating whether a punctuation character for
22656 TARGET_PRINT_OPERAND is valid. */
22658 arm_print_operand_punct_valid_p (unsigned char code
)
22660 return (code
== '@' || code
== '|' || code
== '.'
22661 || code
== '(' || code
== ')' || code
== '#'
22662 || (TARGET_32BIT
&& (code
== '?'))
22663 || (TARGET_THUMB2
&& (code
== '!'))
22664 || (TARGET_THUMB
&& (code
== '_')));
22667 /* Target hook for assembling integer objects. The ARM version needs to
22668 handle word-sized values specially. */
22670 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
22674 if (size
== UNITS_PER_WORD
&& aligned_p
)
22676 fputs ("\t.word\t", asm_out_file
);
22677 output_addr_const (asm_out_file
, x
);
22679 /* Mark symbols as position independent. We only do this in the
22680 .text segment, not in the .data segment. */
22681 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
22682 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
22684 /* See legitimize_pic_address for an explanation of the
22685 TARGET_VXWORKS_RTP check. */
22686 /* References to weak symbols cannot be resolved locally:
22687 they may be overridden by a non-weak definition at link
22689 if (!arm_pic_data_is_text_relative
22690 || (GET_CODE (x
) == SYMBOL_REF
22691 && (!SYMBOL_REF_LOCAL_P (x
)
22692 || (SYMBOL_REF_DECL (x
)
22693 ? DECL_WEAK (SYMBOL_REF_DECL (x
)) : 0))))
22694 fputs ("(GOT)", asm_out_file
);
22696 fputs ("(GOTOFF)", asm_out_file
);
22698 fputc ('\n', asm_out_file
);
22702 mode
= GET_MODE (x
);
22704 if (arm_vector_mode_supported_p (mode
))
22708 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22710 units
= CONST_VECTOR_NUNITS (x
);
22711 size
= GET_MODE_UNIT_SIZE (mode
);
22713 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
22714 for (i
= 0; i
< units
; i
++)
22716 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22718 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
22721 for (i
= 0; i
< units
; i
++)
22723 rtx elt
= CONST_VECTOR_ELT (x
, i
);
22725 (*CONST_DOUBLE_REAL_VALUE (elt
),
22726 as_a
<scalar_float_mode
> (GET_MODE_INNER (mode
)),
22727 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
22733 return default_assemble_integer (x
, size
, aligned_p
);
22737 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
22741 if (!TARGET_AAPCS_BASED
)
22744 default_named_section_asm_out_constructor
22745 : default_named_section_asm_out_destructor
) (symbol
, priority
);
22749 /* Put these in the .init_array section, using a special relocation. */
22750 if (priority
!= DEFAULT_INIT_PRIORITY
)
22753 sprintf (buf
, "%s.%.5u",
22754 is_ctor
? ".init_array" : ".fini_array",
22756 s
= get_section (buf
, SECTION_WRITE
| SECTION_NOTYPE
, NULL_TREE
);
22763 switch_to_section (s
);
22764 assemble_align (POINTER_SIZE
);
22765 fputs ("\t.word\t", asm_out_file
);
22766 output_addr_const (asm_out_file
, symbol
);
22767 fputs ("(target1)\n", asm_out_file
);
22770 /* Add a function to the list of static constructors. */
22773 arm_elf_asm_constructor (rtx symbol
, int priority
)
22775 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
22778 /* Add a function to the list of static destructors. */
22781 arm_elf_asm_destructor (rtx symbol
, int priority
)
22783 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
22786 /* A finite state machine takes care of noticing whether or not instructions
22787 can be conditionally executed, and thus decrease execution time and code
22788 size by deleting branch instructions. The fsm is controlled by
22789 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22791 /* The state of the fsm controlling condition codes are:
22792 0: normal, do nothing special
22793 1: make ASM_OUTPUT_OPCODE not output this instruction
22794 2: make ASM_OUTPUT_OPCODE not output this instruction
22795 3: make instructions conditional
22796 4: make instructions conditional
22798 State transitions (state->state by whom under condition):
22799 0 -> 1 final_prescan_insn if the `target' is a label
22800 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22801 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22802 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22803 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22804 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22805 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22806 (the target insn is arm_target_insn).
22808 If the jump clobbers the conditions then we use states 2 and 4.
22810 A similar thing can be done with conditional return insns.
22812 XXX In case the `target' is an unconditional branch, this conditionalising
22813 of the instructions always reduces code size, but not always execution
22814 time. But then, I want to reduce the code size to somewhere near what
22815 /bin/cc produces. */
22817 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22818 instructions. When a COND_EXEC instruction is seen the subsequent
22819 instructions are scanned so that multiple conditional instructions can be
22820 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22821 specify the length and true/false mask for the IT block. These will be
22822 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22824 /* Returns the index of the ARM condition code string in
22825 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22826 COMPARISON should be an rtx like `(eq (...) (...))'. */
22829 maybe_get_arm_condition_code (rtx comparison
)
22831 machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
22832 enum arm_cond_code code
;
22833 enum rtx_code comp_code
= GET_CODE (comparison
);
22835 if (GET_MODE_CLASS (mode
) != MODE_CC
)
22836 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
22837 XEXP (comparison
, 1));
22841 case E_CC_DNEmode
: code
= ARM_NE
; goto dominance
;
22842 case E_CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
22843 case E_CC_DGEmode
: code
= ARM_GE
; goto dominance
;
22844 case E_CC_DGTmode
: code
= ARM_GT
; goto dominance
;
22845 case E_CC_DLEmode
: code
= ARM_LE
; goto dominance
;
22846 case E_CC_DLTmode
: code
= ARM_LT
; goto dominance
;
22847 case E_CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
22848 case E_CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
22849 case E_CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
22850 case E_CC_DLTUmode
: code
= ARM_CC
;
22853 if (comp_code
== EQ
)
22854 return ARM_INVERSE_CONDITION_CODE (code
);
22855 if (comp_code
== NE
)
22859 case E_CC_NOOVmode
:
22862 case NE
: return ARM_NE
;
22863 case EQ
: return ARM_EQ
;
22864 case GE
: return ARM_PL
;
22865 case LT
: return ARM_MI
;
22866 default: return ARM_NV
;
22872 case NE
: return ARM_NE
;
22873 case EQ
: return ARM_EQ
;
22874 default: return ARM_NV
;
22880 case NE
: return ARM_MI
;
22881 case EQ
: return ARM_PL
;
22882 default: return ARM_NV
;
22887 /* We can handle all cases except UNEQ and LTGT. */
22890 case GE
: return ARM_GE
;
22891 case GT
: return ARM_GT
;
22892 case LE
: return ARM_LS
;
22893 case LT
: return ARM_MI
;
22894 case NE
: return ARM_NE
;
22895 case EQ
: return ARM_EQ
;
22896 case ORDERED
: return ARM_VC
;
22897 case UNORDERED
: return ARM_VS
;
22898 case UNLT
: return ARM_LT
;
22899 case UNLE
: return ARM_LE
;
22900 case UNGT
: return ARM_HI
;
22901 case UNGE
: return ARM_PL
;
22902 /* UNEQ and LTGT do not have a representation. */
22903 case UNEQ
: /* Fall through. */
22904 case LTGT
: /* Fall through. */
22905 default: return ARM_NV
;
22911 case NE
: return ARM_NE
;
22912 case EQ
: return ARM_EQ
;
22913 case GE
: return ARM_LE
;
22914 case GT
: return ARM_LT
;
22915 case LE
: return ARM_GE
;
22916 case LT
: return ARM_GT
;
22917 case GEU
: return ARM_LS
;
22918 case GTU
: return ARM_CC
;
22919 case LEU
: return ARM_CS
;
22920 case LTU
: return ARM_HI
;
22921 default: return ARM_NV
;
22927 case LTU
: return ARM_CS
;
22928 case GEU
: return ARM_CC
;
22929 case NE
: return ARM_CS
;
22930 case EQ
: return ARM_CC
;
22931 default: return ARM_NV
;
22937 case NE
: return ARM_NE
;
22938 case EQ
: return ARM_EQ
;
22939 case GEU
: return ARM_CS
;
22940 case GTU
: return ARM_HI
;
22941 case LEU
: return ARM_LS
;
22942 case LTU
: return ARM_CC
;
22943 default: return ARM_NV
;
22949 case GE
: return ARM_GE
;
22950 case LT
: return ARM_LT
;
22951 case GEU
: return ARM_CS
;
22952 case LTU
: return ARM_CC
;
22953 default: return ARM_NV
;
22959 case NE
: return ARM_VS
;
22960 case EQ
: return ARM_VC
;
22961 default: return ARM_NV
;
22967 case NE
: return ARM_NE
;
22968 case EQ
: return ARM_EQ
;
22969 case GE
: return ARM_GE
;
22970 case GT
: return ARM_GT
;
22971 case LE
: return ARM_LE
;
22972 case LT
: return ARM_LT
;
22973 case GEU
: return ARM_CS
;
22974 case GTU
: return ARM_HI
;
22975 case LEU
: return ARM_LS
;
22976 case LTU
: return ARM_CC
;
22977 default: return ARM_NV
;
22980 default: gcc_unreachable ();
22984 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22985 static enum arm_cond_code
22986 get_arm_condition_code (rtx comparison
)
22988 enum arm_cond_code code
= maybe_get_arm_condition_code (comparison
);
22989 gcc_assert (code
!= ARM_NV
);
22993 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22994 code registers when not targetting Thumb1. The VFP condition register
22995 only exists when generating hard-float code. */
22997 arm_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23003 *p2
= TARGET_HARD_FLOAT
? VFPCC_REGNUM
: INVALID_REGNUM
;
23007 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23010 thumb2_final_prescan_insn (rtx_insn
*insn
)
23012 rtx_insn
*first_insn
= insn
;
23013 rtx body
= PATTERN (insn
);
23015 enum arm_cond_code code
;
23020 /* max_insns_skipped in the tune was already taken into account in the
23021 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23022 just emit the IT blocks as we can. It does not make sense to split
23024 max
= MAX_INSN_PER_IT_BLOCK
;
23026 /* Remove the previous insn from the count of insns to be output. */
23027 if (arm_condexec_count
)
23028 arm_condexec_count
--;
23030 /* Nothing to do if we are already inside a conditional block. */
23031 if (arm_condexec_count
)
23034 if (GET_CODE (body
) != COND_EXEC
)
23037 /* Conditional jumps are implemented directly. */
23041 predicate
= COND_EXEC_TEST (body
);
23042 arm_current_cc
= get_arm_condition_code (predicate
);
23044 n
= get_attr_ce_count (insn
);
23045 arm_condexec_count
= 1;
23046 arm_condexec_mask
= (1 << n
) - 1;
23047 arm_condexec_masklen
= n
;
23048 /* See if subsequent instructions can be combined into the same block. */
23051 insn
= next_nonnote_insn (insn
);
23053 /* Jumping into the middle of an IT block is illegal, so a label or
23054 barrier terminates the block. */
23055 if (!NONJUMP_INSN_P (insn
) && !JUMP_P (insn
))
23058 body
= PATTERN (insn
);
23059 /* USE and CLOBBER aren't really insns, so just skip them. */
23060 if (GET_CODE (body
) == USE
23061 || GET_CODE (body
) == CLOBBER
)
23064 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23065 if (GET_CODE (body
) != COND_EXEC
)
23067 /* Maximum number of conditionally executed instructions in a block. */
23068 n
= get_attr_ce_count (insn
);
23069 if (arm_condexec_masklen
+ n
> max
)
23072 predicate
= COND_EXEC_TEST (body
);
23073 code
= get_arm_condition_code (predicate
);
23074 mask
= (1 << n
) - 1;
23075 if (arm_current_cc
== code
)
23076 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
23077 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
23080 arm_condexec_count
++;
23081 arm_condexec_masklen
+= n
;
23083 /* A jump must be the last instruction in a conditional block. */
23087 /* Restore recog_data (getting the attributes of other insns can
23088 destroy this array, but final.c assumes that it remains intact
23089 across this call). */
23090 extract_constrain_insn_cached (first_insn
);
23094 arm_final_prescan_insn (rtx_insn
*insn
)
23096 /* BODY will hold the body of INSN. */
23097 rtx body
= PATTERN (insn
);
23099 /* This will be 1 if trying to repeat the trick, and things need to be
23100 reversed if it appears to fail. */
23103 /* If we start with a return insn, we only succeed if we find another one. */
23104 int seeking_return
= 0;
23105 enum rtx_code return_code
= UNKNOWN
;
23107 /* START_INSN will hold the insn from where we start looking. This is the
23108 first insn after the following code_label if REVERSE is true. */
23109 rtx_insn
*start_insn
= insn
;
23111 /* If in state 4, check if the target branch is reached, in order to
23112 change back to state 0. */
23113 if (arm_ccfsm_state
== 4)
23115 if (insn
== arm_target_insn
)
23117 arm_target_insn
= NULL
;
23118 arm_ccfsm_state
= 0;
23123 /* If in state 3, it is possible to repeat the trick, if this insn is an
23124 unconditional branch to a label, and immediately following this branch
23125 is the previous target label which is only used once, and the label this
23126 branch jumps to is not too far off. */
23127 if (arm_ccfsm_state
== 3)
23129 if (simplejump_p (insn
))
23131 start_insn
= next_nonnote_insn (start_insn
);
23132 if (BARRIER_P (start_insn
))
23134 /* XXX Isn't this always a barrier? */
23135 start_insn
= next_nonnote_insn (start_insn
);
23137 if (LABEL_P (start_insn
)
23138 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23139 && LABEL_NUSES (start_insn
) == 1)
23144 else if (ANY_RETURN_P (body
))
23146 start_insn
= next_nonnote_insn (start_insn
);
23147 if (BARRIER_P (start_insn
))
23148 start_insn
= next_nonnote_insn (start_insn
);
23149 if (LABEL_P (start_insn
)
23150 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
23151 && LABEL_NUSES (start_insn
) == 1)
23154 seeking_return
= 1;
23155 return_code
= GET_CODE (body
);
23164 gcc_assert (!arm_ccfsm_state
|| reverse
);
23165 if (!JUMP_P (insn
))
23168 /* This jump might be paralleled with a clobber of the condition codes
23169 the jump should always come first */
23170 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
23171 body
= XVECEXP (body
, 0, 0);
23174 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
23175 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
23178 int fail
= FALSE
, succeed
= FALSE
;
23179 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23180 int then_not_else
= TRUE
;
23181 rtx_insn
*this_insn
= start_insn
;
23184 /* Register the insn jumped to. */
23187 if (!seeking_return
)
23188 label
= XEXP (SET_SRC (body
), 0);
23190 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
23191 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
23192 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
23194 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
23195 then_not_else
= FALSE
;
23197 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 1)))
23199 seeking_return
= 1;
23200 return_code
= GET_CODE (XEXP (SET_SRC (body
), 1));
23202 else if (ANY_RETURN_P (XEXP (SET_SRC (body
), 2)))
23204 seeking_return
= 1;
23205 return_code
= GET_CODE (XEXP (SET_SRC (body
), 2));
23206 then_not_else
= FALSE
;
23209 gcc_unreachable ();
23211 /* See how many insns this branch skips, and what kind of insns. If all
23212 insns are okay, and the label or unconditional branch to the same
23213 label is not too far away, succeed. */
23214 for (insns_skipped
= 0;
23215 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
23219 this_insn
= next_nonnote_insn (this_insn
);
23223 switch (GET_CODE (this_insn
))
23226 /* Succeed if it is the target label, otherwise fail since
23227 control falls in from somewhere else. */
23228 if (this_insn
== label
)
23230 arm_ccfsm_state
= 1;
23238 /* Succeed if the following insn is the target label.
23240 If return insns are used then the last insn in a function
23241 will be a barrier. */
23242 this_insn
= next_nonnote_insn (this_insn
);
23243 if (this_insn
&& this_insn
== label
)
23245 arm_ccfsm_state
= 1;
23253 /* The AAPCS says that conditional calls should not be
23254 used since they make interworking inefficient (the
23255 linker can't transform BL<cond> into BLX). That's
23256 only a problem if the machine has BLX. */
23263 /* Succeed if the following insn is the target label, or
23264 if the following two insns are a barrier and the
23266 this_insn
= next_nonnote_insn (this_insn
);
23267 if (this_insn
&& BARRIER_P (this_insn
))
23268 this_insn
= next_nonnote_insn (this_insn
);
23270 if (this_insn
&& this_insn
== label
23271 && insns_skipped
< max_insns_skipped
)
23273 arm_ccfsm_state
= 1;
23281 /* If this is an unconditional branch to the same label, succeed.
23282 If it is to another label, do nothing. If it is conditional,
23284 /* XXX Probably, the tests for SET and the PC are
23287 scanbody
= PATTERN (this_insn
);
23288 if (GET_CODE (scanbody
) == SET
23289 && GET_CODE (SET_DEST (scanbody
)) == PC
)
23291 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
23292 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
23294 arm_ccfsm_state
= 2;
23297 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
23300 /* Fail if a conditional return is undesirable (e.g. on a
23301 StrongARM), but still allow this if optimizing for size. */
23302 else if (GET_CODE (scanbody
) == return_code
23303 && !use_return_insn (TRUE
, NULL
)
23306 else if (GET_CODE (scanbody
) == return_code
)
23308 arm_ccfsm_state
= 2;
23311 else if (GET_CODE (scanbody
) == PARALLEL
)
23313 switch (get_attr_conds (this_insn
))
23323 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
23328 /* Instructions using or affecting the condition codes make it
23330 scanbody
= PATTERN (this_insn
);
23331 if (!(GET_CODE (scanbody
) == SET
23332 || GET_CODE (scanbody
) == PARALLEL
)
23333 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
23343 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
23344 arm_target_label
= CODE_LABEL_NUMBER (label
);
23347 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
23349 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
23351 this_insn
= next_nonnote_insn (this_insn
);
23352 gcc_assert (!this_insn
23353 || (!BARRIER_P (this_insn
)
23354 && !LABEL_P (this_insn
)));
23358 /* Oh, dear! we ran off the end.. give up. */
23359 extract_constrain_insn_cached (insn
);
23360 arm_ccfsm_state
= 0;
23361 arm_target_insn
= NULL
;
23364 arm_target_insn
= this_insn
;
23367 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23370 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
23372 if (reverse
|| then_not_else
)
23373 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
23376 /* Restore recog_data (getting the attributes of other insns can
23377 destroy this array, but final.c assumes that it remains intact
23378 across this call. */
23379 extract_constrain_insn_cached (insn
);
23383 /* Output IT instructions. */
23385 thumb2_asm_output_opcode (FILE * stream
)
23390 if (arm_condexec_mask
)
23392 for (n
= 0; n
< arm_condexec_masklen
; n
++)
23393 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
23395 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
23396 arm_condition_codes
[arm_current_cc
]);
23397 arm_condexec_mask
= 0;
23401 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23402 UNITS_PER_WORD bytes wide. */
23403 static unsigned int
23404 arm_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
23407 && regno
> PC_REGNUM
23408 && regno
!= FRAME_POINTER_REGNUM
23409 && regno
!= ARG_POINTER_REGNUM
23410 && !IS_VFP_REGNUM (regno
))
23413 return ARM_NUM_REGS (mode
);
23416 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23418 arm_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
23420 if (GET_MODE_CLASS (mode
) == MODE_CC
)
23421 return (regno
== CC_REGNUM
23422 || (TARGET_HARD_FLOAT
23423 && regno
== VFPCC_REGNUM
));
23425 if (regno
== CC_REGNUM
&& GET_MODE_CLASS (mode
) != MODE_CC
)
23429 /* For the Thumb we only allow values bigger than SImode in
23430 registers 0 - 6, so that there is always a second low
23431 register available to hold the upper part of the value.
23432 We probably we ought to ensure that the register is the
23433 start of an even numbered register pair. */
23434 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
23436 if (TARGET_HARD_FLOAT
&& IS_VFP_REGNUM (regno
))
23438 if (mode
== SFmode
|| mode
== SImode
)
23439 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23441 if (mode
== DFmode
)
23442 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
23444 if (mode
== HFmode
)
23445 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23447 /* VFP registers can hold HImode values. */
23448 if (mode
== HImode
)
23449 return VFP_REGNO_OK_FOR_SINGLE (regno
);
23452 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
23453 || (VALID_NEON_QREG_MODE (mode
)
23454 && NEON_REGNO_OK_FOR_QUAD (regno
))
23455 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
23456 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
23457 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
23458 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
23459 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
23464 if (TARGET_REALLY_IWMMXT
)
23466 if (IS_IWMMXT_GR_REGNUM (regno
))
23467 return mode
== SImode
;
23469 if (IS_IWMMXT_REGNUM (regno
))
23470 return VALID_IWMMXT_REG_MODE (mode
);
23473 /* We allow almost any value to be stored in the general registers.
23474 Restrict doubleword quantities to even register pairs in ARM state
23475 so that we can use ldrd. Do not allow very large Neon structure
23476 opaque modes in general registers; they would use too many. */
23477 if (regno
<= LAST_ARM_REGNUM
)
23479 if (ARM_NUM_REGS (mode
) > 4)
23485 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0);
23488 if (regno
== FRAME_POINTER_REGNUM
23489 || regno
== ARG_POINTER_REGNUM
)
23490 /* We only allow integers in the fake hard registers. */
23491 return GET_MODE_CLASS (mode
) == MODE_INT
;
23496 /* Implement TARGET_MODES_TIEABLE_P. */
23499 arm_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
23501 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
23504 /* We specifically want to allow elements of "structure" modes to
23505 be tieable to the structure. This more general condition allows
23506 other rarer situations too. */
23508 && (VALID_NEON_DREG_MODE (mode1
)
23509 || VALID_NEON_QREG_MODE (mode1
)
23510 || VALID_NEON_STRUCT_MODE (mode1
))
23511 && (VALID_NEON_DREG_MODE (mode2
)
23512 || VALID_NEON_QREG_MODE (mode2
)
23513 || VALID_NEON_STRUCT_MODE (mode2
)))
23519 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23520 not used in arm mode. */
23523 arm_regno_class (int regno
)
23525 if (regno
== PC_REGNUM
)
23530 if (regno
== STACK_POINTER_REGNUM
)
23532 if (regno
== CC_REGNUM
)
23539 if (TARGET_THUMB2
&& regno
< 8)
23542 if ( regno
<= LAST_ARM_REGNUM
23543 || regno
== FRAME_POINTER_REGNUM
23544 || regno
== ARG_POINTER_REGNUM
)
23545 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
23547 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
23548 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
23550 if (IS_VFP_REGNUM (regno
))
23552 if (regno
<= D7_VFP_REGNUM
)
23553 return VFP_D0_D7_REGS
;
23554 else if (regno
<= LAST_LO_VFP_REGNUM
)
23555 return VFP_LO_REGS
;
23557 return VFP_HI_REGS
;
23560 if (IS_IWMMXT_REGNUM (regno
))
23561 return IWMMXT_REGS
;
23563 if (IS_IWMMXT_GR_REGNUM (regno
))
23564 return IWMMXT_GR_REGS
;
23569 /* Handle a special case when computing the offset
23570 of an argument from the frame pointer. */
23572 arm_debugger_arg_offset (int value
, rtx addr
)
23576 /* We are only interested if dbxout_parms() failed to compute the offset. */
23580 /* We can only cope with the case where the address is held in a register. */
23584 /* If we are using the frame pointer to point at the argument, then
23585 an offset of 0 is correct. */
23586 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
23589 /* If we are using the stack pointer to point at the
23590 argument, then an offset of 0 is correct. */
23591 /* ??? Check this is consistent with thumb2 frame layout. */
23592 if ((TARGET_THUMB
|| !frame_pointer_needed
)
23593 && REGNO (addr
) == SP_REGNUM
)
23596 /* Oh dear. The argument is pointed to by a register rather
23597 than being held in a register, or being stored at a known
23598 offset from the frame pointer. Since GDB only understands
23599 those two kinds of argument we must translate the address
23600 held in the register into an offset from the frame pointer.
23601 We do this by searching through the insns for the function
23602 looking to see where this register gets its value. If the
23603 register is initialized from the frame pointer plus an offset
23604 then we are in luck and we can continue, otherwise we give up.
23606 This code is exercised by producing debugging information
23607 for a function with arguments like this:
23609 double func (double a, double b, int c, double d) {return d;}
23611 Without this code the stab for parameter 'd' will be set to
23612 an offset of 0 from the frame pointer, rather than 8. */
23614 /* The if() statement says:
23616 If the insn is a normal instruction
23617 and if the insn is setting the value in a register
23618 and if the register being set is the register holding the address of the argument
23619 and if the address is computing by an addition
23620 that involves adding to a register
23621 which is the frame pointer
23626 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
23628 if ( NONJUMP_INSN_P (insn
)
23629 && GET_CODE (PATTERN (insn
)) == SET
23630 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
23631 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
23632 && REG_P (XEXP (XEXP (PATTERN (insn
), 1), 0))
23633 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23634 && CONST_INT_P (XEXP (XEXP (PATTERN (insn
), 1), 1))
23637 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
23646 warning (0, "unable to compute real location of stacked parameter");
23647 value
= 8; /* XXX magic hack */
23653 /* Implement TARGET_PROMOTED_TYPE. */
23656 arm_promoted_type (const_tree t
)
23658 if (SCALAR_FLOAT_TYPE_P (t
)
23659 && TYPE_PRECISION (t
) == 16
23660 && TYPE_MAIN_VARIANT (t
) == arm_fp16_type_node
)
23661 return float_type_node
;
23665 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23666 This simply adds HFmode as a supported mode; even though we don't
23667 implement arithmetic on this type directly, it's supported by
23668 optabs conversions, much the way the double-word arithmetic is
23669 special-cased in the default hook. */
23672 arm_scalar_mode_supported_p (scalar_mode mode
)
23674 if (mode
== HFmode
)
23675 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
23676 else if (ALL_FIXED_POINT_MODE_P (mode
))
23679 return default_scalar_mode_supported_p (mode
);
23682 /* Set the value of FLT_EVAL_METHOD.
23683 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23685 0: evaluate all operations and constants, whose semantic type has at
23686 most the range and precision of type float, to the range and
23687 precision of float; evaluate all other operations and constants to
23688 the range and precision of the semantic type;
23690 N, where _FloatN is a supported interchange floating type
23691 evaluate all operations and constants, whose semantic type has at
23692 most the range and precision of _FloatN type, to the range and
23693 precision of the _FloatN type; evaluate all other operations and
23694 constants to the range and precision of the semantic type;
23696 If we have the ARMv8.2-A extensions then we support _Float16 in native
23697 precision, so we should set this to 16. Otherwise, we support the type,
23698 but want to evaluate expressions in float precision, so set this to
23701 static enum flt_eval_method
23702 arm_excess_precision (enum excess_precision_type type
)
23706 case EXCESS_PRECISION_TYPE_FAST
:
23707 case EXCESS_PRECISION_TYPE_STANDARD
:
23708 /* We can calculate either in 16-bit range and precision or
23709 32-bit range and precision. Make that decision based on whether
23710 we have native support for the ARMv8.2-A 16-bit floating-point
23711 instructions or not. */
23712 return (TARGET_VFP_FP16INST
23713 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23714 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
23715 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23716 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
23718 gcc_unreachable ();
23720 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23724 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23725 _Float16 if we are using anything other than ieee format for 16-bit
23726 floating point. Otherwise, punt to the default implementation. */
23727 static opt_scalar_float_mode
23728 arm_floatn_mode (int n
, bool extended
)
23730 if (!extended
&& n
== 16)
23732 if (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
)
23734 return opt_scalar_float_mode ();
23737 return default_floatn_mode (n
, extended
);
23741 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23742 not to early-clobber SRC registers in the process.
23744 We assume that the operands described by SRC and DEST represent a
23745 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23746 number of components into which the copy has been decomposed. */
23748 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
23752 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
23753 || REGNO (operands
[0]) < REGNO (operands
[1]))
23755 for (i
= 0; i
< count
; i
++)
23757 operands
[2 * i
] = dest
[i
];
23758 operands
[2 * i
+ 1] = src
[i
];
23763 for (i
= 0; i
< count
; i
++)
23765 operands
[2 * i
] = dest
[count
- i
- 1];
23766 operands
[2 * i
+ 1] = src
[count
- i
- 1];
23771 /* Split operands into moves from op[1] + op[2] into op[0]. */
23774 neon_split_vcombine (rtx operands
[3])
23776 unsigned int dest
= REGNO (operands
[0]);
23777 unsigned int src1
= REGNO (operands
[1]);
23778 unsigned int src2
= REGNO (operands
[2]);
23779 machine_mode halfmode
= GET_MODE (operands
[1]);
23780 unsigned int halfregs
= REG_NREGS (operands
[1]);
23781 rtx destlo
, desthi
;
23783 if (src1
== dest
&& src2
== dest
+ halfregs
)
23785 /* No-op move. Can't split to nothing; emit something. */
23786 emit_note (NOTE_INSN_DELETED
);
23790 /* Preserve register attributes for variable tracking. */
23791 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
23792 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
23793 GET_MODE_SIZE (halfmode
));
23795 /* Special case of reversed high/low parts. Use VSWP. */
23796 if (src2
== dest
&& src1
== dest
+ halfregs
)
23798 rtx x
= gen_rtx_SET (destlo
, operands
[1]);
23799 rtx y
= gen_rtx_SET (desthi
, operands
[2]);
23800 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, x
, y
)));
23804 if (!reg_overlap_mentioned_p (operands
[2], destlo
))
23806 /* Try to avoid unnecessary moves if part of the result
23807 is in the right place already. */
23809 emit_move_insn (destlo
, operands
[1]);
23810 if (src2
!= dest
+ halfregs
)
23811 emit_move_insn (desthi
, operands
[2]);
23815 if (src2
!= dest
+ halfregs
)
23816 emit_move_insn (desthi
, operands
[2]);
23818 emit_move_insn (destlo
, operands
[1]);
23822 /* Return the number (counting from 0) of
23823 the least significant set bit in MASK. */
23826 number_of_first_bit_set (unsigned mask
)
23828 return ctz_hwi (mask
);
23831 /* Like emit_multi_reg_push, but allowing for a different set of
23832 registers to be described as saved. MASK is the set of registers
23833 to be saved; REAL_REGS is the set of registers to be described as
23834 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23837 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
23839 unsigned long regno
;
23840 rtx par
[10], tmp
, reg
;
23844 /* Build the parallel of the registers actually being stored. */
23845 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
23847 regno
= ctz_hwi (mask
);
23848 reg
= gen_rtx_REG (SImode
, regno
);
23851 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
23853 tmp
= gen_rtx_USE (VOIDmode
, reg
);
23858 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23859 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
23860 tmp
= gen_frame_mem (BLKmode
, tmp
);
23861 tmp
= gen_rtx_SET (tmp
, par
[0]);
23864 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
23865 insn
= emit_insn (tmp
);
23867 /* Always build the stack adjustment note for unwind info. */
23868 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, -4 * i
);
23869 tmp
= gen_rtx_SET (stack_pointer_rtx
, tmp
);
23872 /* Build the parallel of the registers recorded as saved for unwind. */
23873 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
23875 regno
= ctz_hwi (real_regs
);
23876 reg
= gen_rtx_REG (SImode
, regno
);
23878 tmp
= plus_constant (Pmode
, stack_pointer_rtx
, j
* 4);
23879 tmp
= gen_frame_mem (SImode
, tmp
);
23880 tmp
= gen_rtx_SET (tmp
, reg
);
23881 RTX_FRAME_RELATED_P (tmp
) = 1;
23889 RTX_FRAME_RELATED_P (par
[0]) = 1;
23890 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
23893 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
23898 /* Emit code to push or pop registers to or from the stack. F is the
23899 assembly file. MASK is the registers to pop. */
23901 thumb_pop (FILE *f
, unsigned long mask
)
23904 int lo_mask
= mask
& 0xFF;
23908 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
23910 /* Special case. Do not generate a POP PC statement here, do it in
23912 thumb_exit (f
, -1);
23916 fprintf (f
, "\tpop\t{");
23918 /* Look at the low registers first. */
23919 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
23923 asm_fprintf (f
, "%r", regno
);
23925 if ((lo_mask
& ~1) != 0)
23930 if (mask
& (1 << PC_REGNUM
))
23932 /* Catch popping the PC. */
23933 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
|| crtl
->calls_eh_return
23934 || IS_CMSE_ENTRY (arm_current_func_type ()))
23936 /* The PC is never poped directly, instead
23937 it is popped into r3 and then BX is used. */
23938 fprintf (f
, "}\n");
23940 thumb_exit (f
, -1);
23949 asm_fprintf (f
, "%r", PC_REGNUM
);
23953 fprintf (f
, "}\n");
23956 /* Generate code to return from a thumb function.
23957 If 'reg_containing_return_addr' is -1, then the return address is
23958 actually on the stack, at the stack pointer.
23960 Note: do not forget to update length attribute of corresponding insn pattern
23961 when changing assembly output (eg. length attribute of epilogue_insns when
23962 updating Armv8-M Baseline Security Extensions register clearing
23965 thumb_exit (FILE *f
, int reg_containing_return_addr
)
23967 unsigned regs_available_for_popping
;
23968 unsigned regs_to_pop
;
23970 unsigned available
;
23974 int restore_a4
= FALSE
;
23976 /* Compute the registers we need to pop. */
23980 if (reg_containing_return_addr
== -1)
23982 regs_to_pop
|= 1 << LR_REGNUM
;
23986 if (TARGET_BACKTRACE
)
23988 /* Restore the (ARM) frame pointer and stack pointer. */
23989 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
23993 /* If there is nothing to pop then just emit the BX instruction and
23995 if (pops_needed
== 0)
23997 if (crtl
->calls_eh_return
)
23998 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24000 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24002 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n",
24003 reg_containing_return_addr
);
24004 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24007 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24010 /* Otherwise if we are not supporting interworking and we have not created
24011 a backtrace structure and the function was not entered in ARM mode then
24012 just pop the return address straight into the PC. */
24013 else if (!TARGET_INTERWORK
24014 && !TARGET_BACKTRACE
24015 && !is_called_in_ARM_mode (current_function_decl
)
24016 && !crtl
->calls_eh_return
24017 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24019 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
24023 /* Find out how many of the (return) argument registers we can corrupt. */
24024 regs_available_for_popping
= 0;
24026 /* If returning via __builtin_eh_return, the bottom three registers
24027 all contain information needed for the return. */
24028 if (crtl
->calls_eh_return
)
24032 /* If we can deduce the registers used from the function's
24033 return value. This is more reliable that examining
24034 df_regs_ever_live_p () because that will be set if the register is
24035 ever used in the function, not just if the register is used
24036 to hold a return value. */
24038 if (crtl
->return_rtx
!= 0)
24039 mode
= GET_MODE (crtl
->return_rtx
);
24041 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
24043 size
= GET_MODE_SIZE (mode
);
24047 /* In a void function we can use any argument register.
24048 In a function that returns a structure on the stack
24049 we can use the second and third argument registers. */
24050 if (mode
== VOIDmode
)
24051 regs_available_for_popping
=
24052 (1 << ARG_REGISTER (1))
24053 | (1 << ARG_REGISTER (2))
24054 | (1 << ARG_REGISTER (3));
24056 regs_available_for_popping
=
24057 (1 << ARG_REGISTER (2))
24058 | (1 << ARG_REGISTER (3));
24060 else if (size
<= 4)
24061 regs_available_for_popping
=
24062 (1 << ARG_REGISTER (2))
24063 | (1 << ARG_REGISTER (3));
24064 else if (size
<= 8)
24065 regs_available_for_popping
=
24066 (1 << ARG_REGISTER (3));
24069 /* Match registers to be popped with registers into which we pop them. */
24070 for (available
= regs_available_for_popping
,
24071 required
= regs_to_pop
;
24072 required
!= 0 && available
!= 0;
24073 available
&= ~(available
& - available
),
24074 required
&= ~(required
& - required
))
24077 /* If we have any popping registers left over, remove them. */
24079 regs_available_for_popping
&= ~available
;
24081 /* Otherwise if we need another popping register we can use
24082 the fourth argument register. */
24083 else if (pops_needed
)
24085 /* If we have not found any free argument registers and
24086 reg a4 contains the return address, we must move it. */
24087 if (regs_available_for_popping
== 0
24088 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
24090 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24091 reg_containing_return_addr
= LR_REGNUM
;
24093 else if (size
> 12)
24095 /* Register a4 is being used to hold part of the return value,
24096 but we have dire need of a free, low register. */
24099 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
24102 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
24104 /* The fourth argument register is available. */
24105 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
24111 /* Pop as many registers as we can. */
24112 thumb_pop (f
, regs_available_for_popping
);
24114 /* Process the registers we popped. */
24115 if (reg_containing_return_addr
== -1)
24117 /* The return address was popped into the lowest numbered register. */
24118 regs_to_pop
&= ~(1 << LR_REGNUM
);
24120 reg_containing_return_addr
=
24121 number_of_first_bit_set (regs_available_for_popping
);
24123 /* Remove this register for the mask of available registers, so that
24124 the return address will not be corrupted by further pops. */
24125 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
24128 /* If we popped other registers then handle them here. */
24129 if (regs_available_for_popping
)
24133 /* Work out which register currently contains the frame pointer. */
24134 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24136 /* Move it into the correct place. */
24137 asm_fprintf (f
, "\tmov\t%r, %r\n",
24138 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
24140 /* (Temporarily) remove it from the mask of popped registers. */
24141 regs_available_for_popping
&= ~(1 << frame_pointer
);
24142 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
24144 if (regs_available_for_popping
)
24148 /* We popped the stack pointer as well,
24149 find the register that contains it. */
24150 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
24152 /* Move it into the stack register. */
24153 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
24155 /* At this point we have popped all necessary registers, so
24156 do not worry about restoring regs_available_for_popping
24157 to its correct value:
24159 assert (pops_needed == 0)
24160 assert (regs_available_for_popping == (1 << frame_pointer))
24161 assert (regs_to_pop == (1 << STACK_POINTER)) */
24165 /* Since we have just move the popped value into the frame
24166 pointer, the popping register is available for reuse, and
24167 we know that we still have the stack pointer left to pop. */
24168 regs_available_for_popping
|= (1 << frame_pointer
);
24172 /* If we still have registers left on the stack, but we no longer have
24173 any registers into which we can pop them, then we must move the return
24174 address into the link register and make available the register that
24176 if (regs_available_for_popping
== 0 && pops_needed
> 0)
24178 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
24180 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
24181 reg_containing_return_addr
);
24183 reg_containing_return_addr
= LR_REGNUM
;
24186 /* If we have registers left on the stack then pop some more.
24187 We know that at most we will want to pop FP and SP. */
24188 if (pops_needed
> 0)
24193 thumb_pop (f
, regs_available_for_popping
);
24195 /* We have popped either FP or SP.
24196 Move whichever one it is into the correct register. */
24197 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24198 move_to
= number_of_first_bit_set (regs_to_pop
);
24200 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
24204 /* If we still have not popped everything then we must have only
24205 had one register available to us and we are now popping the SP. */
24206 if (pops_needed
> 0)
24210 thumb_pop (f
, regs_available_for_popping
);
24212 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
24214 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
24216 assert (regs_to_pop == (1 << STACK_POINTER))
24217 assert (pops_needed == 1)
24221 /* If necessary restore the a4 register. */
24224 if (reg_containing_return_addr
!= LR_REGNUM
)
24226 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
24227 reg_containing_return_addr
= LR_REGNUM
;
24230 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
24233 if (crtl
->calls_eh_return
)
24234 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
24236 /* Return to caller. */
24237 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24239 /* This is for the cases where LR is not being used to contain the return
24240 address. It may therefore contain information that we might not want
24241 to leak, hence it must be cleared. The value in R0 will never be a
24242 secret at this point, so it is safe to use it, see the clearing code
24243 in 'cmse_nonsecure_entry_clear_before_return'. */
24244 if (reg_containing_return_addr
!= LR_REGNUM
)
24245 asm_fprintf (f
, "\tmov\tlr, r0\n");
24247 asm_fprintf (f
, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr
);
24248 asm_fprintf (f
, "\tbxns\t%r\n", reg_containing_return_addr
);
24251 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
24254 /* Scan INSN just before assembler is output for it.
24255 For Thumb-1, we track the status of the condition codes; this
24256 information is used in the cbranchsi4_insn pattern. */
24258 thumb1_final_prescan_insn (rtx_insn
*insn
)
24260 if (flag_print_asm_name
)
24261 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
24262 INSN_ADDRESSES (INSN_UID (insn
)));
24263 /* Don't overwrite the previous setter when we get to a cbranch. */
24264 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
24266 enum attr_conds conds
;
24268 if (cfun
->machine
->thumb1_cc_insn
)
24270 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
24271 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
24274 conds
= get_attr_conds (insn
);
24275 if (conds
== CONDS_SET
)
24277 rtx set
= single_set (insn
);
24278 cfun
->machine
->thumb1_cc_insn
= insn
;
24279 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
24280 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
24281 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
24282 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
24284 rtx src1
= XEXP (SET_SRC (set
), 1);
24285 if (src1
== const0_rtx
)
24286 cfun
->machine
->thumb1_cc_mode
= CCmode
;
24288 else if (REG_P (SET_DEST (set
)) && REG_P (SET_SRC (set
)))
24290 /* Record the src register operand instead of dest because
24291 cprop_hardreg pass propagates src. */
24292 cfun
->machine
->thumb1_cc_op0
= SET_SRC (set
);
24295 else if (conds
!= CONDS_NOCOND
)
24296 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
24299 /* Check if unexpected far jump is used. */
24300 if (cfun
->machine
->lr_save_eliminated
24301 && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24302 internal_error("Unexpected thumb1 far jump");
24306 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
24308 unsigned HOST_WIDE_INT mask
= 0xff;
24311 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
24312 if (val
== 0) /* XXX */
24315 for (i
= 0; i
< 25; i
++)
24316 if ((val
& (mask
<< i
)) == val
)
24322 /* Returns nonzero if the current function contains,
24323 or might contain a far jump. */
24325 thumb_far_jump_used_p (void)
24328 bool far_jump
= false;
24329 unsigned int func_size
= 0;
24331 /* If we have already decided that far jumps may be used,
24332 do not bother checking again, and always return true even if
24333 it turns out that they are not being used. Once we have made
24334 the decision that far jumps are present (and that hence the link
24335 register will be pushed onto the stack) we cannot go back on it. */
24336 if (cfun
->machine
->far_jump_used
)
24339 /* If this function is not being called from the prologue/epilogue
24340 generation code then it must be being called from the
24341 INITIAL_ELIMINATION_OFFSET macro. */
24342 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
24344 /* In this case we know that we are being asked about the elimination
24345 of the arg pointer register. If that register is not being used,
24346 then there are no arguments on the stack, and we do not have to
24347 worry that a far jump might force the prologue to push the link
24348 register, changing the stack offsets. In this case we can just
24349 return false, since the presence of far jumps in the function will
24350 not affect stack offsets.
24352 If the arg pointer is live (or if it was live, but has now been
24353 eliminated and so set to dead) then we do have to test to see if
24354 the function might contain a far jump. This test can lead to some
24355 false negatives, since before reload is completed, then length of
24356 branch instructions is not known, so gcc defaults to returning their
24357 longest length, which in turn sets the far jump attribute to true.
24359 A false negative will not result in bad code being generated, but it
24360 will result in a needless push and pop of the link register. We
24361 hope that this does not occur too often.
24363 If we need doubleword stack alignment this could affect the other
24364 elimination offsets so we can't risk getting it wrong. */
24365 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
24366 cfun
->machine
->arg_pointer_live
= 1;
24367 else if (!cfun
->machine
->arg_pointer_live
)
24371 /* We should not change far_jump_used during or after reload, as there is
24372 no chance to change stack frame layout. */
24373 if (reload_in_progress
|| reload_completed
)
24376 /* Check to see if the function contains a branch
24377 insn with the far jump attribute set. */
24378 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
24380 if (JUMP_P (insn
) && get_attr_far_jump (insn
) == FAR_JUMP_YES
)
24384 func_size
+= get_attr_length (insn
);
24387 /* Attribute far_jump will always be true for thumb1 before
24388 shorten_branch pass. So checking far_jump attribute before
24389 shorten_branch isn't much useful.
24391 Following heuristic tries to estimate more accurately if a far jump
24392 may finally be used. The heuristic is very conservative as there is
24393 no chance to roll-back the decision of not to use far jump.
24395 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24396 2-byte insn is associated with a 4 byte constant pool. Using
24397 function size 2048/3 as the threshold is conservative enough. */
24400 if ((func_size
* 3) >= 2048)
24402 /* Record the fact that we have decided that
24403 the function does use far jumps. */
24404 cfun
->machine
->far_jump_used
= 1;
24412 /* Return nonzero if FUNC must be entered in ARM mode. */
24414 is_called_in_ARM_mode (tree func
)
24416 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
24418 /* Ignore the problem about functions whose address is taken. */
24419 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
24423 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
24429 /* Given the stack offsets and register mask in OFFSETS, decide how
24430 many additional registers to push instead of subtracting a constant
24431 from SP. For epilogues the principle is the same except we use pop.
24432 FOR_PROLOGUE indicates which we're generating. */
24434 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
24436 HOST_WIDE_INT amount
;
24437 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
24438 /* Extract a mask of the ones we can give to the Thumb's push/pop
24440 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
24441 /* Then count how many other high registers will need to be pushed. */
24442 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24443 int n_free
, reg_base
, size
;
24445 if (!for_prologue
&& frame_pointer_needed
)
24446 amount
= offsets
->locals_base
- offsets
->saved_regs
;
24448 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
24450 /* If the stack frame size is 512 exactly, we can save one load
24451 instruction, which should make this a win even when optimizing
24453 if (!optimize_size
&& amount
!= 512)
24456 /* Can't do this if there are high registers to push. */
24457 if (high_regs_pushed
!= 0)
24460 /* Shouldn't do it in the prologue if no registers would normally
24461 be pushed at all. In the epilogue, also allow it if we'll have
24462 a pop insn for the PC. */
24465 || TARGET_BACKTRACE
24466 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
24467 || TARGET_INTERWORK
24468 || crtl
->args
.pretend_args_size
!= 0))
24471 /* Don't do this if thumb_expand_prologue wants to emit instructions
24472 between the push and the stack frame allocation. */
24474 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
24475 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
24482 size
= arm_size_return_regs ();
24483 reg_base
= ARM_NUM_INTS (size
);
24484 live_regs_mask
>>= reg_base
;
24487 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
24488 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
24490 live_regs_mask
>>= 1;
24496 gcc_assert (amount
/ 4 * 4 == amount
);
24498 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
24499 return (amount
- 508) / 4;
24500 if (amount
<= n_free
* 4)
24505 /* The bits which aren't usefully expanded as rtl. */
24507 thumb1_unexpanded_epilogue (void)
24509 arm_stack_offsets
*offsets
;
24511 unsigned long live_regs_mask
= 0;
24512 int high_regs_pushed
= 0;
24514 int had_to_push_lr
;
24517 if (cfun
->machine
->return_used_this_function
!= 0)
24520 if (IS_NAKED (arm_current_func_type ()))
24523 offsets
= arm_get_frame_offsets ();
24524 live_regs_mask
= offsets
->saved_regs_mask
;
24525 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24527 /* If we can deduce the registers used from the function's return value.
24528 This is more reliable that examining df_regs_ever_live_p () because that
24529 will be set if the register is ever used in the function, not just if
24530 the register is used to hold a return value. */
24531 size
= arm_size_return_regs ();
24533 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
24536 unsigned long extra_mask
= (1 << extra_pop
) - 1;
24537 live_regs_mask
|= extra_mask
<< ARM_NUM_INTS (size
);
24540 /* The prolog may have pushed some high registers to use as
24541 work registers. e.g. the testsuite file:
24542 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24543 compiles to produce:
24544 push {r4, r5, r6, r7, lr}
24548 as part of the prolog. We have to undo that pushing here. */
24550 if (high_regs_pushed
)
24552 unsigned long mask
= live_regs_mask
& 0xff;
24555 /* The available low registers depend on the size of the value we are
24563 /* Oh dear! We have no low registers into which we can pop
24566 ("no low registers available for popping high registers");
24568 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
24569 if (live_regs_mask
& (1 << next_hi_reg
))
24572 while (high_regs_pushed
)
24574 /* Find lo register(s) into which the high register(s) can
24576 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24578 if (mask
& (1 << regno
))
24579 high_regs_pushed
--;
24580 if (high_regs_pushed
== 0)
24584 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
24586 /* Pop the values into the low register(s). */
24587 thumb_pop (asm_out_file
, mask
);
24589 /* Move the value(s) into the high registers. */
24590 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
24592 if (mask
& (1 << regno
))
24594 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
24597 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
24598 if (live_regs_mask
& (1 << next_hi_reg
))
24603 live_regs_mask
&= ~0x0f00;
24606 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
24607 live_regs_mask
&= 0xff;
24609 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
24611 /* Pop the return address into the PC. */
24612 if (had_to_push_lr
)
24613 live_regs_mask
|= 1 << PC_REGNUM
;
24615 /* Either no argument registers were pushed or a backtrace
24616 structure was created which includes an adjusted stack
24617 pointer, so just pop everything. */
24618 if (live_regs_mask
)
24619 thumb_pop (asm_out_file
, live_regs_mask
);
24621 /* We have either just popped the return address into the
24622 PC or it is was kept in LR for the entire function.
24623 Note that thumb_pop has already called thumb_exit if the
24624 PC was in the list. */
24625 if (!had_to_push_lr
)
24626 thumb_exit (asm_out_file
, LR_REGNUM
);
24630 /* Pop everything but the return address. */
24631 if (live_regs_mask
)
24632 thumb_pop (asm_out_file
, live_regs_mask
);
24634 if (had_to_push_lr
)
24638 /* We have no free low regs, so save one. */
24639 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
24643 /* Get the return address into a temporary register. */
24644 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
24648 /* Move the return address to lr. */
24649 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
24651 /* Restore the low register. */
24652 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
24657 regno
= LAST_ARG_REGNUM
;
24662 /* Remove the argument registers that were pushed onto the stack. */
24663 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
24664 SP_REGNUM
, SP_REGNUM
,
24665 crtl
->args
.pretend_args_size
);
24667 thumb_exit (asm_out_file
, regno
);
24673 /* Functions to save and restore machine-specific function data. */
24674 static struct machine_function
*
24675 arm_init_machine_status (void)
24677 struct machine_function
*machine
;
24678 machine
= ggc_cleared_alloc
<machine_function
> ();
24680 #if ARM_FT_UNKNOWN != 0
24681 machine
->func_type
= ARM_FT_UNKNOWN
;
24686 /* Return an RTX indicating where the return address to the
24687 calling function can be found. */
24689 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
24694 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
24697 /* Do anything needed before RTL is emitted for each function. */
24699 arm_init_expanders (void)
24701 /* Arrange to initialize and mark the machine per-function status. */
24702 init_machine_status
= arm_init_machine_status
;
24704 /* This is to stop the combine pass optimizing away the alignment
24705 adjustment of va_arg. */
24706 /* ??? It is claimed that this should not be necessary. */
24708 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
24711 /* Check that FUNC is called with a different mode. */
24714 arm_change_mode_p (tree func
)
24716 if (TREE_CODE (func
) != FUNCTION_DECL
)
24719 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (func
);
24722 callee_tree
= target_option_default_node
;
24724 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
24725 int flags
= callee_opts
->x_target_flags
;
24727 return (TARGET_THUMB_P (flags
) != TARGET_THUMB
);
24730 /* Like arm_compute_initial_elimination offset. Simpler because there
24731 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24732 to point at the base of the local variables after static stack
24733 space for a function has been allocated. */
24736 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
24738 arm_stack_offsets
*offsets
;
24740 offsets
= arm_get_frame_offsets ();
24744 case ARG_POINTER_REGNUM
:
24747 case STACK_POINTER_REGNUM
:
24748 return offsets
->outgoing_args
- offsets
->saved_args
;
24750 case FRAME_POINTER_REGNUM
:
24751 return offsets
->soft_frame
- offsets
->saved_args
;
24753 case ARM_HARD_FRAME_POINTER_REGNUM
:
24754 return offsets
->saved_regs
- offsets
->saved_args
;
24756 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24757 return offsets
->locals_base
- offsets
->saved_args
;
24760 gcc_unreachable ();
24764 case FRAME_POINTER_REGNUM
:
24767 case STACK_POINTER_REGNUM
:
24768 return offsets
->outgoing_args
- offsets
->soft_frame
;
24770 case ARM_HARD_FRAME_POINTER_REGNUM
:
24771 return offsets
->saved_regs
- offsets
->soft_frame
;
24773 case THUMB_HARD_FRAME_POINTER_REGNUM
:
24774 return offsets
->locals_base
- offsets
->soft_frame
;
24777 gcc_unreachable ();
24782 gcc_unreachable ();
24786 /* Generate the function's prologue. */
24789 thumb1_expand_prologue (void)
24793 HOST_WIDE_INT amount
;
24794 HOST_WIDE_INT size
;
24795 arm_stack_offsets
*offsets
;
24796 unsigned long func_type
;
24798 unsigned long live_regs_mask
;
24799 unsigned long l_mask
;
24800 unsigned high_regs_pushed
= 0;
24801 bool lr_needs_saving
;
24803 func_type
= arm_current_func_type ();
24805 /* Naked functions don't have prologues. */
24806 if (IS_NAKED (func_type
))
24808 if (flag_stack_usage_info
)
24809 current_function_static_stack_size
= 0;
24813 if (IS_INTERRUPT (func_type
))
24815 error ("interrupt Service Routines cannot be coded in Thumb mode");
24819 if (is_called_in_ARM_mode (current_function_decl
))
24820 emit_insn (gen_prologue_thumb1_interwork ());
24822 offsets
= arm_get_frame_offsets ();
24823 live_regs_mask
= offsets
->saved_regs_mask
;
24824 lr_needs_saving
= live_regs_mask
& (1 << LR_REGNUM
);
24826 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24827 l_mask
= live_regs_mask
& 0x40ff;
24828 /* Then count how many other high registers will need to be pushed. */
24829 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
24831 if (crtl
->args
.pretend_args_size
)
24833 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
24835 if (cfun
->machine
->uses_anonymous_args
)
24837 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
24838 unsigned long mask
;
24840 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
24841 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
24843 insn
= thumb1_emit_multi_reg_push (mask
, 0);
24847 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24848 stack_pointer_rtx
, x
));
24850 RTX_FRAME_RELATED_P (insn
) = 1;
24853 if (TARGET_BACKTRACE
)
24855 HOST_WIDE_INT offset
= 0;
24856 unsigned work_register
;
24857 rtx work_reg
, x
, arm_hfp_rtx
;
24859 /* We have been asked to create a stack backtrace structure.
24860 The code looks like this:
24864 0 sub SP, #16 Reserve space for 4 registers.
24865 2 push {R7} Push low registers.
24866 4 add R7, SP, #20 Get the stack pointer before the push.
24867 6 str R7, [SP, #8] Store the stack pointer
24868 (before reserving the space).
24869 8 mov R7, PC Get hold of the start of this code + 12.
24870 10 str R7, [SP, #16] Store it.
24871 12 mov R7, FP Get hold of the current frame pointer.
24872 14 str R7, [SP, #4] Store it.
24873 16 mov R7, LR Get hold of the current return address.
24874 18 str R7, [SP, #12] Store it.
24875 20 add R7, SP, #16 Point at the start of the
24876 backtrace structure.
24877 22 mov FP, R7 Put this value into the frame pointer. */
24879 work_register
= thumb_find_work_register (live_regs_mask
);
24880 work_reg
= gen_rtx_REG (SImode
, work_register
);
24881 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
24883 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
24884 stack_pointer_rtx
, GEN_INT (-16)));
24885 RTX_FRAME_RELATED_P (insn
) = 1;
24889 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
24890 RTX_FRAME_RELATED_P (insn
) = 1;
24891 lr_needs_saving
= false;
24893 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
24896 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
24897 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24899 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 4);
24900 x
= gen_frame_mem (SImode
, x
);
24901 emit_move_insn (x
, work_reg
);
24903 /* Make sure that the instruction fetching the PC is in the right place
24904 to calculate "start of backtrace creation code + 12". */
24905 /* ??? The stores using the common WORK_REG ought to be enough to
24906 prevent the scheduler from doing anything weird. Failing that
24907 we could always move all of the following into an UNSPEC_VOLATILE. */
24910 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24911 emit_move_insn (work_reg
, x
);
24913 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24914 x
= gen_frame_mem (SImode
, x
);
24915 emit_move_insn (x
, work_reg
);
24917 emit_move_insn (work_reg
, arm_hfp_rtx
);
24919 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24920 x
= gen_frame_mem (SImode
, x
);
24921 emit_move_insn (x
, work_reg
);
24925 emit_move_insn (work_reg
, arm_hfp_rtx
);
24927 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
24928 x
= gen_frame_mem (SImode
, x
);
24929 emit_move_insn (x
, work_reg
);
24931 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
24932 emit_move_insn (work_reg
, x
);
24934 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 12);
24935 x
= gen_frame_mem (SImode
, x
);
24936 emit_move_insn (x
, work_reg
);
24939 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
24940 emit_move_insn (work_reg
, x
);
24942 x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
+ 8);
24943 x
= gen_frame_mem (SImode
, x
);
24944 emit_move_insn (x
, work_reg
);
24946 x
= GEN_INT (offset
+ 12);
24947 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
24949 emit_move_insn (arm_hfp_rtx
, work_reg
);
24951 /* Optimization: If we are not pushing any low registers but we are going
24952 to push some high registers then delay our first push. This will just
24953 be a push of LR and we can combine it with the push of the first high
24955 else if ((l_mask
& 0xff) != 0
24956 || (high_regs_pushed
== 0 && lr_needs_saving
))
24958 unsigned long mask
= l_mask
;
24959 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
24960 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
24961 RTX_FRAME_RELATED_P (insn
) = 1;
24962 lr_needs_saving
= false;
24965 if (high_regs_pushed
)
24967 unsigned pushable_regs
;
24968 unsigned next_hi_reg
;
24969 unsigned arg_regs_num
= TARGET_AAPCS_BASED
? crtl
->args
.info
.aapcs_ncrn
24970 : crtl
->args
.info
.nregs
;
24971 unsigned arg_regs_mask
= (1 << arg_regs_num
) - 1;
24973 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
24974 if (live_regs_mask
& (1 << next_hi_reg
))
24977 /* Here we need to mask out registers used for passing arguments
24978 even if they can be pushed. This is to avoid using them to stash the high
24979 registers. Such kind of stash may clobber the use of arguments. */
24980 pushable_regs
= l_mask
& (~arg_regs_mask
);
24981 if (lr_needs_saving
)
24982 pushable_regs
&= ~(1 << LR_REGNUM
);
24984 if (pushable_regs
== 0)
24985 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
24987 while (high_regs_pushed
> 0)
24989 unsigned long real_regs_mask
= 0;
24990 unsigned long push_mask
= 0;
24992 for (regno
= LR_REGNUM
; regno
>= 0; regno
--)
24994 if (pushable_regs
& (1 << regno
))
24996 emit_move_insn (gen_rtx_REG (SImode
, regno
),
24997 gen_rtx_REG (SImode
, next_hi_reg
));
24999 high_regs_pushed
--;
25000 real_regs_mask
|= (1 << next_hi_reg
);
25001 push_mask
|= (1 << regno
);
25003 if (high_regs_pushed
)
25005 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
25007 if (live_regs_mask
& (1 << next_hi_reg
))
25015 /* If we had to find a work register and we have not yet
25016 saved the LR then add it to the list of regs to push. */
25017 if (lr_needs_saving
)
25019 push_mask
|= 1 << LR_REGNUM
;
25020 real_regs_mask
|= 1 << LR_REGNUM
;
25021 lr_needs_saving
= false;
25024 insn
= thumb1_emit_multi_reg_push (push_mask
, real_regs_mask
);
25025 RTX_FRAME_RELATED_P (insn
) = 1;
25029 /* Load the pic register before setting the frame pointer,
25030 so we can use r7 as a temporary work register. */
25031 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
25032 arm_load_pic_register (live_regs_mask
);
25034 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
25035 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
25036 stack_pointer_rtx
);
25038 size
= offsets
->outgoing_args
- offsets
->saved_args
;
25039 if (flag_stack_usage_info
)
25040 current_function_static_stack_size
= size
;
25042 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25043 if ((flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
25044 || flag_stack_clash_protection
)
25046 sorry ("-fstack-check=specific for Thumb-1");
25048 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25049 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
25054 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25055 GEN_INT (- amount
)));
25056 RTX_FRAME_RELATED_P (insn
) = 1;
25062 /* The stack decrement is too big for an immediate value in a single
25063 insn. In theory we could issue multiple subtracts, but after
25064 three of them it becomes more space efficient to place the full
25065 value in the constant pool and load into a register. (Also the
25066 ARM debugger really likes to see only one stack decrement per
25067 function). So instead we look for a scratch register into which
25068 we can load the decrement, and then we subtract this from the
25069 stack pointer. Unfortunately on the thumb the only available
25070 scratch registers are the argument registers, and we cannot use
25071 these as they may hold arguments to the function. Instead we
25072 attempt to locate a call preserved register which is used by this
25073 function. If we can find one, then we know that it will have
25074 been pushed at the start of the prologue and so we can corrupt
25076 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
25077 if (live_regs_mask
& (1 << regno
))
25080 gcc_assert(regno
<= LAST_LO_REGNUM
);
25082 reg
= gen_rtx_REG (SImode
, regno
);
25084 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
25086 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25087 stack_pointer_rtx
, reg
));
25089 dwarf
= gen_rtx_SET (stack_pointer_rtx
,
25090 plus_constant (Pmode
, stack_pointer_rtx
,
25092 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
25093 RTX_FRAME_RELATED_P (insn
) = 1;
25097 if (frame_pointer_needed
)
25098 thumb_set_frame_pointer (offsets
);
25100 /* If we are profiling, make sure no instructions are scheduled before
25101 the call to mcount. Similarly if the user has requested no
25102 scheduling in the prolog. Similarly if we want non-call exceptions
25103 using the EABI unwinder, to prevent faulting instructions from being
25104 swapped with a stack adjustment. */
25105 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
25106 || (arm_except_unwind_info (&global_options
) == UI_TARGET
25107 && cfun
->can_throw_non_call_exceptions
))
25108 emit_insn (gen_blockage ());
25110 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
25111 if (live_regs_mask
& 0xff)
25112 cfun
->machine
->lr_save_eliminated
= 0;
25115 /* Clear caller saved registers not used to pass return values and leaked
25116 condition flags before exiting a cmse_nonsecure_entry function. */
25119 cmse_nonsecure_entry_clear_before_return (void)
25121 int regno
, maxregno
= TARGET_HARD_FLOAT
? LAST_VFP_REGNUM
: IP_REGNUM
;
25122 uint32_t padding_bits_to_clear
= 0;
25123 uint32_t * padding_bits_to_clear_ptr
= &padding_bits_to_clear
;
25124 auto_sbitmap
to_clear_bitmap (maxregno
+ 1);
25128 bitmap_clear (to_clear_bitmap
);
25129 bitmap_set_range (to_clear_bitmap
, R0_REGNUM
, NUM_ARG_REGS
);
25130 bitmap_set_bit (to_clear_bitmap
, IP_REGNUM
);
25132 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25134 if (TARGET_HARD_FLOAT
)
25136 int float_bits
= D7_VFP_REGNUM
- FIRST_VFP_REGNUM
+ 1;
25138 bitmap_set_range (to_clear_bitmap
, FIRST_VFP_REGNUM
, float_bits
);
25140 /* Make sure we don't clear the two scratch registers used to clear the
25141 relevant FPSCR bits in output_return_instruction. */
25142 emit_use (gen_rtx_REG (SImode
, IP_REGNUM
));
25143 bitmap_clear_bit (to_clear_bitmap
, IP_REGNUM
);
25144 emit_use (gen_rtx_REG (SImode
, 4));
25145 bitmap_clear_bit (to_clear_bitmap
, 4);
25148 /* If the user has defined registers to be caller saved, these are no longer
25149 restored by the function before returning and must thus be cleared for
25150 security purposes. */
25151 for (regno
= NUM_ARG_REGS
; regno
<= maxregno
; regno
++)
25153 /* We do not touch registers that can be used to pass arguments as per
25154 the AAPCS, since these should never be made callee-saved by user
25156 if (IN_RANGE (regno
, FIRST_VFP_REGNUM
, D7_VFP_REGNUM
))
25158 if (IN_RANGE (regno
, IP_REGNUM
, PC_REGNUM
))
25160 if (call_used_regs
[regno
])
25161 bitmap_set_bit (to_clear_bitmap
, regno
);
25164 /* Make sure we do not clear the registers used to return the result in. */
25165 result_type
= TREE_TYPE (DECL_RESULT (current_function_decl
));
25166 if (!VOID_TYPE_P (result_type
))
25168 uint64_t to_clear_return_mask
;
25169 result_rtl
= arm_function_value (result_type
, current_function_decl
, 0);
25171 /* No need to check that we return in registers, because we don't
25172 support returning on stack yet. */
25173 gcc_assert (REG_P (result_rtl
));
25174 to_clear_return_mask
25175 = compute_not_to_clear_mask (result_type
, result_rtl
, 0,
25176 padding_bits_to_clear_ptr
);
25177 if (to_clear_return_mask
)
25179 gcc_assert ((unsigned) maxregno
< sizeof (long long) * __CHAR_BIT__
);
25180 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25182 if (to_clear_return_mask
& (1ULL << regno
))
25183 bitmap_clear_bit (to_clear_bitmap
, regno
);
25188 if (padding_bits_to_clear
!= 0)
25191 auto_sbitmap
to_clear_arg_regs_bitmap (R0_REGNUM
+ NUM_ARG_REGS
);
25193 /* Padding bits to clear is not 0 so we know we are dealing with
25194 returning a composite type, which only uses r0. Let's make sure that
25195 r1-r3 is cleared too, we will use r1 as a scratch register. */
25196 bitmap_clear (to_clear_arg_regs_bitmap
);
25197 bitmap_set_range (to_clear_arg_regs_bitmap
, R0_REGNUM
+ 1,
25199 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap
, to_clear_bitmap
));
25201 reg_rtx
= gen_rtx_REG (SImode
, R1_REGNUM
);
25203 /* Fill the lower half of the negated padding_bits_to_clear. */
25204 emit_move_insn (reg_rtx
,
25205 GEN_INT ((((~padding_bits_to_clear
) << 16u) >> 16u)));
25207 /* Also fill the top half of the negated padding_bits_to_clear. */
25208 if (((~padding_bits_to_clear
) >> 16) > 0)
25209 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode
, reg_rtx
,
25212 GEN_INT ((~padding_bits_to_clear
) >> 16)));
25214 emit_insn (gen_andsi3 (gen_rtx_REG (SImode
, R0_REGNUM
),
25215 gen_rtx_REG (SImode
, R0_REGNUM
),
25219 for (regno
= R0_REGNUM
; regno
<= maxregno
; regno
++)
25221 if (!bitmap_bit_p (to_clear_bitmap
, regno
))
25224 if (IS_VFP_REGNUM (regno
))
25226 /* If regno is an even vfp register and its successor is also to
25227 be cleared, use vmov. */
25228 if (TARGET_VFP_DOUBLE
25229 && VFP_REGNO_OK_FOR_DOUBLE (regno
)
25230 && bitmap_bit_p (to_clear_bitmap
, regno
+ 1))
25232 emit_move_insn (gen_rtx_REG (DFmode
, regno
),
25233 CONST1_RTX (DFmode
));
25234 emit_use (gen_rtx_REG (DFmode
, regno
));
25239 emit_move_insn (gen_rtx_REG (SFmode
, regno
),
25240 CONST1_RTX (SFmode
));
25241 emit_use (gen_rtx_REG (SFmode
, regno
));
25248 if (regno
== R0_REGNUM
)
25249 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25252 /* R0 has either been cleared before, see code above, or it
25253 holds a return value, either way it is not secret
25255 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25256 gen_rtx_REG (SImode
, R0_REGNUM
));
25257 emit_use (gen_rtx_REG (SImode
, regno
));
25261 emit_move_insn (gen_rtx_REG (SImode
, regno
),
25262 gen_rtx_REG (SImode
, LR_REGNUM
));
25263 emit_use (gen_rtx_REG (SImode
, regno
));
25269 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25270 POP instruction can be generated. LR should be replaced by PC. All
25271 the checks required are already done by USE_RETURN_INSN (). Hence,
25272 all we really need to check here is if single register is to be
25273 returned, or multiple register return. */
25275 thumb2_expand_return (bool simple_return
)
25278 unsigned long saved_regs_mask
;
25279 arm_stack_offsets
*offsets
;
25281 offsets
= arm_get_frame_offsets ();
25282 saved_regs_mask
= offsets
->saved_regs_mask
;
25284 for (i
= 0, num_regs
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25285 if (saved_regs_mask
& (1 << i
))
25288 if (!simple_return
&& saved_regs_mask
)
25290 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25291 functions or adapt code to handle according to ACLE. This path should
25292 not be reachable for cmse_nonsecure_entry functions though we prefer
25293 to assert it for now to ensure that future code changes do not silently
25294 change this behavior. */
25295 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25298 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25299 rtx reg
= gen_rtx_REG (SImode
, PC_REGNUM
);
25300 rtx addr
= gen_rtx_MEM (SImode
,
25301 gen_rtx_POST_INC (SImode
,
25302 stack_pointer_rtx
));
25303 set_mem_alias_set (addr
, get_frame_alias_set ());
25304 XVECEXP (par
, 0, 0) = ret_rtx
;
25305 XVECEXP (par
, 0, 1) = gen_rtx_SET (reg
, addr
);
25306 RTX_FRAME_RELATED_P (XVECEXP (par
, 0, 1)) = 1;
25307 emit_jump_insn (par
);
25311 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
25312 saved_regs_mask
|= (1 << PC_REGNUM
);
25313 arm_emit_multi_reg_pop (saved_regs_mask
);
25318 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25319 cmse_nonsecure_entry_clear_before_return ();
25320 emit_jump_insn (simple_return_rtx
);
25325 thumb1_expand_epilogue (void)
25327 HOST_WIDE_INT amount
;
25328 arm_stack_offsets
*offsets
;
25331 /* Naked functions don't have prologues. */
25332 if (IS_NAKED (arm_current_func_type ()))
25335 offsets
= arm_get_frame_offsets ();
25336 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25338 if (frame_pointer_needed
)
25340 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
25341 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25343 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
25345 gcc_assert (amount
>= 0);
25348 emit_insn (gen_blockage ());
25351 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
25352 GEN_INT (amount
)));
25355 /* r3 is always free in the epilogue. */
25356 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
25358 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
25359 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
25363 /* Emit a USE (stack_pointer_rtx), so that
25364 the stack adjustment will not be deleted. */
25365 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25367 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
25368 emit_insn (gen_blockage ());
25370 /* Emit a clobber for each insn that will be restored in the epilogue,
25371 so that flow2 will get register lifetimes correct. */
25372 for (regno
= 0; regno
< 13; regno
++)
25373 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
25374 emit_clobber (gen_rtx_REG (SImode
, regno
));
25376 if (! df_regs_ever_live_p (LR_REGNUM
))
25377 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
25379 /* Clear all caller-saved regs that are not used to return. */
25380 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25381 cmse_nonsecure_entry_clear_before_return ();
25384 /* Epilogue code for APCS frame. */
25386 arm_expand_epilogue_apcs_frame (bool really_return
)
25388 unsigned long func_type
;
25389 unsigned long saved_regs_mask
;
25392 int floats_from_frame
= 0;
25393 arm_stack_offsets
*offsets
;
25395 gcc_assert (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
);
25396 func_type
= arm_current_func_type ();
25398 /* Get frame offsets for ARM. */
25399 offsets
= arm_get_frame_offsets ();
25400 saved_regs_mask
= offsets
->saved_regs_mask
;
25402 /* Find the offset of the floating-point save area in the frame. */
25404 = (offsets
->saved_args
25405 + arm_compute_static_chain_stack_bytes ()
25408 /* Compute how many core registers saved and how far away the floats are. */
25409 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25410 if (saved_regs_mask
& (1 << i
))
25413 floats_from_frame
+= 4;
25416 if (TARGET_HARD_FLOAT
)
25419 rtx ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
25421 /* The offset is from IP_REGNUM. */
25422 int saved_size
= arm_get_vfp_saved_size ();
25423 if (saved_size
> 0)
25426 floats_from_frame
+= saved_size
;
25427 insn
= emit_insn (gen_addsi3 (ip_rtx
,
25428 hard_frame_pointer_rtx
,
25429 GEN_INT (-floats_from_frame
)));
25430 arm_add_cfa_adjust_cfa_note (insn
, -floats_from_frame
,
25431 ip_rtx
, hard_frame_pointer_rtx
);
25434 /* Generate VFP register multi-pop. */
25435 start_reg
= FIRST_VFP_REGNUM
;
25437 for (i
= FIRST_VFP_REGNUM
; i
< LAST_VFP_REGNUM
; i
+= 2)
25438 /* Look for a case where a reg does not need restoring. */
25439 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25440 && (!df_regs_ever_live_p (i
+ 1)
25441 || call_used_regs
[i
+ 1]))
25443 if (start_reg
!= i
)
25444 arm_emit_vfp_multi_reg_pop (start_reg
,
25445 (i
- start_reg
) / 2,
25446 gen_rtx_REG (SImode
,
25451 /* Restore the remaining regs that we have discovered (or possibly
25452 even all of them, if the conditional in the for loop never
25454 if (start_reg
!= i
)
25455 arm_emit_vfp_multi_reg_pop (start_reg
,
25456 (i
- start_reg
) / 2,
25457 gen_rtx_REG (SImode
, IP_REGNUM
));
25462 /* The frame pointer is guaranteed to be non-double-word aligned, as
25463 it is set to double-word-aligned old_stack_pointer - 4. */
25465 int lrm_count
= (num_regs
% 2) ? (num_regs
+ 2) : (num_regs
+ 1);
25467 for (i
= LAST_IWMMXT_REGNUM
; i
>= FIRST_IWMMXT_REGNUM
; i
--)
25468 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25470 rtx addr
= gen_frame_mem (V2SImode
,
25471 plus_constant (Pmode
, hard_frame_pointer_rtx
,
25473 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25474 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25475 gen_rtx_REG (V2SImode
, i
),
25481 /* saved_regs_mask should contain IP which contains old stack pointer
25482 at the time of activation creation. Since SP and IP are adjacent registers,
25483 we can restore the value directly into SP. */
25484 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
25485 saved_regs_mask
&= ~(1 << IP_REGNUM
);
25486 saved_regs_mask
|= (1 << SP_REGNUM
);
25488 /* There are two registers left in saved_regs_mask - LR and PC. We
25489 only need to restore LR (the return address), but to
25490 save time we can load it directly into PC, unless we need a
25491 special function exit sequence, or we are not really returning. */
25493 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
25494 && !crtl
->calls_eh_return
)
25495 /* Delete LR from the register mask, so that LR on
25496 the stack is loaded into the PC in the register mask. */
25497 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25499 saved_regs_mask
&= ~(1 << PC_REGNUM
);
25501 num_regs
= bit_count (saved_regs_mask
);
25502 if ((offsets
->outgoing_args
!= (1 + num_regs
)) || cfun
->calls_alloca
)
25505 emit_insn (gen_blockage ());
25506 /* Unwind the stack to just below the saved registers. */
25507 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25508 hard_frame_pointer_rtx
,
25509 GEN_INT (- 4 * num_regs
)));
25511 arm_add_cfa_adjust_cfa_note (insn
, - 4 * num_regs
,
25512 stack_pointer_rtx
, hard_frame_pointer_rtx
);
25515 arm_emit_multi_reg_pop (saved_regs_mask
);
25517 if (IS_INTERRUPT (func_type
))
25519 /* Interrupt handlers will have pushed the
25520 IP onto the stack, so restore it now. */
25522 rtx addr
= gen_rtx_MEM (SImode
,
25523 gen_rtx_POST_INC (SImode
,
25524 stack_pointer_rtx
));
25525 set_mem_alias_set (addr
, get_frame_alias_set ());
25526 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, IP_REGNUM
), addr
));
25527 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25528 gen_rtx_REG (SImode
, IP_REGNUM
),
25532 if (!really_return
|| (saved_regs_mask
& (1 << PC_REGNUM
)))
25535 if (crtl
->calls_eh_return
)
25536 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25538 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25540 if (IS_STACKALIGN (func_type
))
25541 /* Restore the original stack pointer. Before prologue, the stack was
25542 realigned and the original stack pointer saved in r0. For details,
25543 see comment in arm_expand_prologue. */
25544 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25546 emit_jump_insn (simple_return_rtx
);
25549 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25550 function is not a sibcall. */
25552 arm_expand_epilogue (bool really_return
)
25554 unsigned long func_type
;
25555 unsigned long saved_regs_mask
;
25559 arm_stack_offsets
*offsets
;
25561 func_type
= arm_current_func_type ();
25563 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25564 let output_return_instruction take care of instruction emission if any. */
25565 if (IS_NAKED (func_type
)
25566 || (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
))
25569 emit_jump_insn (simple_return_rtx
);
25573 /* If we are throwing an exception, then we really must be doing a
25574 return, so we can't tail-call. */
25575 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
25577 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
25579 arm_expand_epilogue_apcs_frame (really_return
);
25583 /* Get frame offsets for ARM. */
25584 offsets
= arm_get_frame_offsets ();
25585 saved_regs_mask
= offsets
->saved_regs_mask
;
25586 num_regs
= bit_count (saved_regs_mask
);
25588 if (frame_pointer_needed
)
25591 /* Restore stack pointer if necessary. */
25594 /* In ARM mode, frame pointer points to first saved register.
25595 Restore stack pointer to last saved register. */
25596 amount
= offsets
->frame
- offsets
->saved_regs
;
25598 /* Force out any pending memory operations that reference stacked data
25599 before stack de-allocation occurs. */
25600 emit_insn (gen_blockage ());
25601 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25602 hard_frame_pointer_rtx
,
25603 GEN_INT (amount
)));
25604 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25606 hard_frame_pointer_rtx
);
25608 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25610 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25614 /* In Thumb-2 mode, the frame pointer points to the last saved
25616 amount
= offsets
->locals_base
- offsets
->saved_regs
;
25619 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
25620 hard_frame_pointer_rtx
,
25621 GEN_INT (amount
)));
25622 arm_add_cfa_adjust_cfa_note (insn
, amount
,
25623 hard_frame_pointer_rtx
,
25624 hard_frame_pointer_rtx
);
25627 /* Force out any pending memory operations that reference stacked data
25628 before stack de-allocation occurs. */
25629 emit_insn (gen_blockage ());
25630 insn
= emit_insn (gen_movsi (stack_pointer_rtx
,
25631 hard_frame_pointer_rtx
));
25632 arm_add_cfa_adjust_cfa_note (insn
, 0,
25634 hard_frame_pointer_rtx
);
25635 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25637 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25642 /* Pop off outgoing args and local frame to adjust stack pointer to
25643 last saved register. */
25644 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
25648 /* Force out any pending memory operations that reference stacked data
25649 before stack de-allocation occurs. */
25650 emit_insn (gen_blockage ());
25651 tmp
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
25653 GEN_INT (amount
)));
25654 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25655 stack_pointer_rtx
, stack_pointer_rtx
);
25656 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25658 emit_insn (gen_force_register_use (stack_pointer_rtx
));
25662 if (TARGET_HARD_FLOAT
)
25664 /* Generate VFP register multi-pop. */
25665 int end_reg
= LAST_VFP_REGNUM
+ 1;
25667 /* Scan the registers in reverse order. We need to match
25668 any groupings made in the prologue and generate matching
25669 vldm operations. The need to match groups is because,
25670 unlike pop, vldm can only do consecutive regs. */
25671 for (i
= LAST_VFP_REGNUM
- 1; i
>= FIRST_VFP_REGNUM
; i
-= 2)
25672 /* Look for a case where a reg does not need restoring. */
25673 if ((!df_regs_ever_live_p (i
) || call_used_regs
[i
])
25674 && (!df_regs_ever_live_p (i
+ 1)
25675 || call_used_regs
[i
+ 1]))
25677 /* Restore the regs discovered so far (from reg+2 to
25679 if (end_reg
> i
+ 2)
25680 arm_emit_vfp_multi_reg_pop (i
+ 2,
25681 (end_reg
- (i
+ 2)) / 2,
25682 stack_pointer_rtx
);
25686 /* Restore the remaining regs that we have discovered (or possibly
25687 even all of them, if the conditional in the for loop never
25689 if (end_reg
> i
+ 2)
25690 arm_emit_vfp_multi_reg_pop (i
+ 2,
25691 (end_reg
- (i
+ 2)) / 2,
25692 stack_pointer_rtx
);
25696 for (i
= FIRST_IWMMXT_REGNUM
; i
<= LAST_IWMMXT_REGNUM
; i
++)
25697 if (df_regs_ever_live_p (i
) && !call_used_regs
[i
])
25700 rtx addr
= gen_rtx_MEM (V2SImode
,
25701 gen_rtx_POST_INC (SImode
,
25702 stack_pointer_rtx
));
25703 set_mem_alias_set (addr
, get_frame_alias_set ());
25704 insn
= emit_insn (gen_movsi (gen_rtx_REG (V2SImode
, i
), addr
));
25705 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25706 gen_rtx_REG (V2SImode
, i
),
25708 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25709 stack_pointer_rtx
, stack_pointer_rtx
);
25712 if (saved_regs_mask
)
25715 bool return_in_pc
= false;
25717 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
25718 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
25719 && !IS_CMSE_ENTRY (func_type
)
25720 && !IS_STACKALIGN (func_type
)
25722 && crtl
->args
.pretend_args_size
== 0
25723 && saved_regs_mask
& (1 << LR_REGNUM
)
25724 && !crtl
->calls_eh_return
)
25726 saved_regs_mask
&= ~(1 << LR_REGNUM
);
25727 saved_regs_mask
|= (1 << PC_REGNUM
);
25728 return_in_pc
= true;
25731 if (num_regs
== 1 && (!IS_INTERRUPT (func_type
) || !return_in_pc
))
25733 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
25734 if (saved_regs_mask
& (1 << i
))
25736 rtx addr
= gen_rtx_MEM (SImode
,
25737 gen_rtx_POST_INC (SImode
,
25738 stack_pointer_rtx
));
25739 set_mem_alias_set (addr
, get_frame_alias_set ());
25741 if (i
== PC_REGNUM
)
25743 insn
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (2));
25744 XVECEXP (insn
, 0, 0) = ret_rtx
;
25745 XVECEXP (insn
, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode
, i
),
25747 RTX_FRAME_RELATED_P (XVECEXP (insn
, 0, 1)) = 1;
25748 insn
= emit_jump_insn (insn
);
25752 insn
= emit_insn (gen_movsi (gen_rtx_REG (SImode
, i
),
25754 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_RESTORE
,
25755 gen_rtx_REG (SImode
, i
),
25757 arm_add_cfa_adjust_cfa_note (insn
, UNITS_PER_WORD
,
25759 stack_pointer_rtx
);
25766 && current_tune
->prefer_ldrd_strd
25767 && !optimize_function_for_size_p (cfun
))
25770 thumb2_emit_ldrd_pop (saved_regs_mask
);
25771 else if (TARGET_ARM
&& !IS_INTERRUPT (func_type
))
25772 arm_emit_ldrd_pop (saved_regs_mask
);
25774 arm_emit_multi_reg_pop (saved_regs_mask
);
25777 arm_emit_multi_reg_pop (saved_regs_mask
);
25785 = crtl
->args
.pretend_args_size
+ arm_compute_static_chain_stack_bytes();
25789 rtx dwarf
= NULL_RTX
;
25791 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25793 GEN_INT (amount
)));
25795 RTX_FRAME_RELATED_P (tmp
) = 1;
25797 if (cfun
->machine
->uses_anonymous_args
)
25799 /* Restore pretend args. Refer arm_expand_prologue on how to save
25800 pretend_args in stack. */
25801 int num_regs
= crtl
->args
.pretend_args_size
/ 4;
25802 saved_regs_mask
= (0xf0 >> num_regs
) & 0xf;
25803 for (j
= 0, i
= 0; j
< num_regs
; i
++)
25804 if (saved_regs_mask
& (1 << i
))
25806 rtx reg
= gen_rtx_REG (SImode
, i
);
25807 dwarf
= alloc_reg_note (REG_CFA_RESTORE
, reg
, dwarf
);
25810 REG_NOTES (tmp
) = dwarf
;
25812 arm_add_cfa_adjust_cfa_note (tmp
, amount
,
25813 stack_pointer_rtx
, stack_pointer_rtx
);
25816 /* Clear all caller-saved regs that are not used to return. */
25817 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25819 /* CMSE_ENTRY always returns. */
25820 gcc_assert (really_return
);
25821 cmse_nonsecure_entry_clear_before_return ();
25824 if (!really_return
)
25827 if (crtl
->calls_eh_return
)
25828 emit_insn (gen_addsi3 (stack_pointer_rtx
,
25830 gen_rtx_REG (SImode
, ARM_EH_STACKADJ_REGNUM
)));
25832 if (IS_STACKALIGN (func_type
))
25833 /* Restore the original stack pointer. Before prologue, the stack was
25834 realigned and the original stack pointer saved in r0. For details,
25835 see comment in arm_expand_prologue. */
25836 emit_insn (gen_movsi (stack_pointer_rtx
, gen_rtx_REG (SImode
, R0_REGNUM
)));
25838 emit_jump_insn (simple_return_rtx
);
25841 /* Implementation of insn prologue_thumb1_interwork. This is the first
25842 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25845 thumb1_output_interwork (void)
25848 FILE *f
= asm_out_file
;
25850 gcc_assert (MEM_P (DECL_RTL (current_function_decl
)));
25851 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
25853 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
25855 /* Generate code sequence to switch us into Thumb mode. */
25856 /* The .code 32 directive has already been emitted by
25857 ASM_DECLARE_FUNCTION_NAME. */
25858 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
25859 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
25861 /* Generate a label, so that the debugger will notice the
25862 change in instruction sets. This label is also used by
25863 the assembler to bypass the ARM code when this function
25864 is called from a Thumb encoded function elsewhere in the
25865 same file. Hence the definition of STUB_NAME here must
25866 agree with the definition in gas/config/tc-arm.c. */
25868 #define STUB_NAME ".real_start_of"
25870 fprintf (f
, "\t.code\t16\n");
25872 if (arm_dllexport_name_p (name
))
25873 name
= arm_strip_name_encoding (name
);
25875 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
25876 fprintf (f
, "\t.thumb_func\n");
25877 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
25882 /* Handle the case of a double word load into a low register from
25883 a computed memory address. The computed address may involve a
25884 register which is overwritten by the load. */
25886 thumb_load_double_from_address (rtx
*operands
)
25894 gcc_assert (REG_P (operands
[0]));
25895 gcc_assert (MEM_P (operands
[1]));
25897 /* Get the memory address. */
25898 addr
= XEXP (operands
[1], 0);
25900 /* Work out how the memory address is computed. */
25901 switch (GET_CODE (addr
))
25904 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25906 if (REGNO (operands
[0]) == REGNO (addr
))
25908 output_asm_insn ("ldr\t%H0, %2", operands
);
25909 output_asm_insn ("ldr\t%0, %1", operands
);
25913 output_asm_insn ("ldr\t%0, %1", operands
);
25914 output_asm_insn ("ldr\t%H0, %2", operands
);
25919 /* Compute <address> + 4 for the high order load. */
25920 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25922 output_asm_insn ("ldr\t%0, %1", operands
);
25923 output_asm_insn ("ldr\t%H0, %2", operands
);
25927 arg1
= XEXP (addr
, 0);
25928 arg2
= XEXP (addr
, 1);
25930 if (CONSTANT_P (arg1
))
25931 base
= arg2
, offset
= arg1
;
25933 base
= arg1
, offset
= arg2
;
25935 gcc_assert (REG_P (base
));
25937 /* Catch the case of <address> = <reg> + <reg> */
25938 if (REG_P (offset
))
25940 int reg_offset
= REGNO (offset
);
25941 int reg_base
= REGNO (base
);
25942 int reg_dest
= REGNO (operands
[0]);
25944 /* Add the base and offset registers together into the
25945 higher destination register. */
25946 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
25947 reg_dest
+ 1, reg_base
, reg_offset
);
25949 /* Load the lower destination register from the address in
25950 the higher destination register. */
25951 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
25952 reg_dest
, reg_dest
+ 1);
25954 /* Load the higher destination register from its own address
25956 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
25957 reg_dest
+ 1, reg_dest
+ 1);
25961 /* Compute <address> + 4 for the high order load. */
25962 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25964 /* If the computed address is held in the low order register
25965 then load the high order register first, otherwise always
25966 load the low order register first. */
25967 if (REGNO (operands
[0]) == REGNO (base
))
25969 output_asm_insn ("ldr\t%H0, %2", operands
);
25970 output_asm_insn ("ldr\t%0, %1", operands
);
25974 output_asm_insn ("ldr\t%0, %1", operands
);
25975 output_asm_insn ("ldr\t%H0, %2", operands
);
25981 /* With no registers to worry about we can just load the value
25983 operands
[2] = adjust_address (operands
[1], SImode
, 4);
25985 output_asm_insn ("ldr\t%H0, %2", operands
);
25986 output_asm_insn ("ldr\t%0, %1", operands
);
25990 gcc_unreachable ();
25997 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
26002 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26003 std::swap (operands
[4], operands
[5]);
26005 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
26006 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
26010 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26011 std::swap (operands
[4], operands
[5]);
26012 if (REGNO (operands
[5]) > REGNO (operands
[6]))
26013 std::swap (operands
[5], operands
[6]);
26014 if (REGNO (operands
[4]) > REGNO (operands
[5]))
26015 std::swap (operands
[4], operands
[5]);
26017 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
26018 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
26022 gcc_unreachable ();
26028 /* Output a call-via instruction for thumb state. */
26030 thumb_call_via_reg (rtx reg
)
26032 int regno
= REGNO (reg
);
26035 gcc_assert (regno
< LR_REGNUM
);
26037 /* If we are in the normal text section we can use a single instance
26038 per compilation unit. If we are doing function sections, then we need
26039 an entry per section, since we can't rely on reachability. */
26040 if (in_section
== text_section
)
26042 thumb_call_reg_needed
= 1;
26044 if (thumb_call_via_label
[regno
] == NULL
)
26045 thumb_call_via_label
[regno
] = gen_label_rtx ();
26046 labelp
= thumb_call_via_label
+ regno
;
26050 if (cfun
->machine
->call_via
[regno
] == NULL
)
26051 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
26052 labelp
= cfun
->machine
->call_via
+ regno
;
26055 output_asm_insn ("bl\t%a0", labelp
);
26059 /* Routines for generating rtl. */
26061 thumb_expand_movmemqi (rtx
*operands
)
26063 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
26064 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
26065 HOST_WIDE_INT len
= INTVAL (operands
[2]);
26066 HOST_WIDE_INT offset
= 0;
26070 emit_insn (gen_movmem12b (out
, in
, out
, in
));
26076 emit_insn (gen_movmem8b (out
, in
, out
, in
));
26082 rtx reg
= gen_reg_rtx (SImode
);
26083 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
26084 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
26091 rtx reg
= gen_reg_rtx (HImode
);
26092 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
26093 plus_constant (Pmode
, in
,
26095 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (Pmode
, out
,
26104 rtx reg
= gen_reg_rtx (QImode
);
26105 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
26106 plus_constant (Pmode
, in
,
26108 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (Pmode
, out
,
26115 thumb_reload_out_hi (rtx
*operands
)
26117 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
26120 /* Return the length of a function name prefix
26121 that starts with the character 'c'. */
26123 arm_get_strip_length (int c
)
26127 ARM_NAME_ENCODING_LENGTHS
26132 /* Return a pointer to a function's name with any
26133 and all prefix encodings stripped from it. */
26135 arm_strip_name_encoding (const char *name
)
26139 while ((skip
= arm_get_strip_length (* name
)))
26145 /* If there is a '*' anywhere in the name's prefix, then
26146 emit the stripped name verbatim, otherwise prepend an
26147 underscore if leading underscores are being used. */
26149 arm_asm_output_labelref (FILE *stream
, const char *name
)
26154 while ((skip
= arm_get_strip_length (* name
)))
26156 verbatim
|= (*name
== '*');
26161 fputs (name
, stream
);
26163 asm_fprintf (stream
, "%U%s", name
);
26166 /* This function is used to emit an EABI tag and its associated value.
26167 We emit the numerical value of the tag in case the assembler does not
26168 support textual tags. (Eg gas prior to 2.20). If requested we include
26169 the tag name in a comment so that anyone reading the assembler output
26170 will know which tag is being set.
26172 This function is not static because arm-c.c needs it too. */
26175 arm_emit_eabi_attribute (const char *name
, int num
, int val
)
26177 asm_fprintf (asm_out_file
, "\t.eabi_attribute %d, %d", num
, val
);
26178 if (flag_verbose_asm
|| flag_debug_asm
)
26179 asm_fprintf (asm_out_file
, "\t%s %s", ASM_COMMENT_START
, name
);
26180 asm_fprintf (asm_out_file
, "\n");
26183 /* This function is used to print CPU tuning information as comment
26184 in assembler file. Pointers are not printed for now. */
26187 arm_print_tune_info (void)
26189 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune parameters\n");
26190 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"constant_limit:\t%d\n",
26191 current_tune
->constant_limit
);
26192 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26193 "max_insns_skipped:\t%d\n", current_tune
->max_insns_skipped
);
26194 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26195 "prefetch.num_slots:\t%d\n", current_tune
->prefetch
.num_slots
);
26196 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26197 "prefetch.l1_cache_size:\t%d\n",
26198 current_tune
->prefetch
.l1_cache_size
);
26199 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26200 "prefetch.l1_cache_line_size:\t%d\n",
26201 current_tune
->prefetch
.l1_cache_line_size
);
26202 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26203 "prefer_constant_pool:\t%d\n",
26204 (int) current_tune
->prefer_constant_pool
);
26205 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26206 "branch_cost:\t(s:speed, p:predictable)\n");
26207 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\ts&p\tcost\n");
26208 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t00\t%d\n",
26209 current_tune
->branch_cost (false, false));
26210 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t01\t%d\n",
26211 current_tune
->branch_cost (false, true));
26212 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t10\t%d\n",
26213 current_tune
->branch_cost (true, false));
26214 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"\t\t11\t%d\n",
26215 current_tune
->branch_cost (true, true));
26216 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26217 "prefer_ldrd_strd:\t%d\n",
26218 (int) current_tune
->prefer_ldrd_strd
);
26219 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26220 "logical_op_non_short_circuit:\t[%d,%d]\n",
26221 (int) current_tune
->logical_op_non_short_circuit_thumb
,
26222 (int) current_tune
->logical_op_non_short_circuit_arm
);
26223 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26224 "prefer_neon_for_64bits:\t%d\n",
26225 (int) current_tune
->prefer_neon_for_64bits
);
26226 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26227 "disparage_flag_setting_t16_encodings:\t%d\n",
26228 (int) current_tune
->disparage_flag_setting_t16_encodings
);
26229 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26230 "string_ops_prefer_neon:\t%d\n",
26231 (int) current_tune
->string_ops_prefer_neon
);
26232 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
26233 "max_insns_inline_memset:\t%d\n",
26234 current_tune
->max_insns_inline_memset
);
26235 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"fusible_ops:\t%u\n",
26236 current_tune
->fusible_ops
);
26237 asm_fprintf (asm_out_file
, "\t\t" ASM_COMMENT_START
"sched_autopref:\t%d\n",
26238 (int) current_tune
->sched_autopref
);
26241 /* Print .arch and .arch_extension directives corresponding to the
26242 current architecture configuration. */
26244 arm_print_asm_arch_directives ()
26246 const arch_option
*arch
26247 = arm_parse_arch_option_name (all_architectures
, "-march",
26248 arm_active_target
.arch_name
);
26249 auto_sbitmap
opt_bits (isa_num_bits
);
26253 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_active_target
.arch_name
);
26254 if (!arch
->common
.extensions
)
26257 for (const struct cpu_arch_extension
*opt
= arch
->common
.extensions
;
26263 arm_initialize_isa (opt_bits
, opt
->isa_bits
);
26265 /* If every feature bit of this option is set in the target
26266 ISA specification, print out the option name. However,
26267 don't print anything if all the bits are part of the
26268 FPU specification. */
26269 if (bitmap_subset_p (opt_bits
, arm_active_target
.isa
)
26270 && !bitmap_subset_p (opt_bits
, isa_all_fpubits
))
26271 asm_fprintf (asm_out_file
, "\t.arch_extension %s\n", opt
->name
);
26277 arm_file_start (void)
26283 /* We don't have a specified CPU. Use the architecture to
26286 Note: it might be better to do this unconditionally, then the
26287 assembler would not need to know about all new CPU names as
26289 if (!arm_active_target
.core_name
)
26291 /* armv7ve doesn't support any extensions. */
26292 if (strcmp (arm_active_target
.arch_name
, "armv7ve") == 0)
26294 /* Keep backward compatability for assemblers
26295 which don't support armv7ve. */
26296 asm_fprintf (asm_out_file
, "\t.arch armv7-a\n");
26297 asm_fprintf (asm_out_file
, "\t.arch_extension virt\n");
26298 asm_fprintf (asm_out_file
, "\t.arch_extension idiv\n");
26299 asm_fprintf (asm_out_file
, "\t.arch_extension sec\n");
26300 asm_fprintf (asm_out_file
, "\t.arch_extension mp\n");
26303 arm_print_asm_arch_directives ();
26305 else if (strncmp (arm_active_target
.core_name
, "generic", 7) == 0)
26306 asm_fprintf (asm_out_file
, "\t.arch %s\n",
26307 arm_active_target
.core_name
+ 8);
26310 const char* truncated_name
26311 = arm_rewrite_selected_cpu (arm_active_target
.core_name
);
26312 asm_fprintf (asm_out_file
, "\t.cpu %s\n", truncated_name
);
26315 if (print_tune_info
)
26316 arm_print_tune_info ();
26318 if (! TARGET_SOFT_FLOAT
)
26320 if (TARGET_HARD_FLOAT
&& TARGET_VFP_SINGLE
)
26321 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26323 if (TARGET_HARD_FLOAT_ABI
)
26324 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26327 /* Some of these attributes only apply when the corresponding features
26328 are used. However we don't have any easy way of figuring this out.
26329 Conservatively record the setting that would have been used. */
26331 if (flag_rounding_math
)
26332 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26334 if (!flag_unsafe_math_optimizations
)
26336 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26337 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26339 if (flag_signaling_nans
)
26340 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26342 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26343 flag_finite_math_only
? 1 : 3);
26345 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26346 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26347 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26348 flag_short_enums
? 1 : 2);
26350 /* Tag_ABI_optimization_goals. */
26353 else if (optimize
>= 2)
26359 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val
);
26361 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26364 if (arm_fp16_format
)
26365 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26366 (int) arm_fp16_format
);
26368 if (arm_lang_output_object_attributes_hook
)
26369 arm_lang_output_object_attributes_hook();
26372 default_file_start ();
26376 arm_file_end (void)
26380 if (NEED_INDICATE_EXEC_STACK
)
26381 /* Add .note.GNU-stack. */
26382 file_end_indicate_exec_stack ();
26384 if (! thumb_call_reg_needed
)
26387 switch_to_section (text_section
);
26388 asm_fprintf (asm_out_file
, "\t.code 16\n");
26389 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
26391 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
26393 rtx label
= thumb_call_via_label
[regno
];
26397 targetm
.asm_out
.internal_label (asm_out_file
, "L",
26398 CODE_LABEL_NUMBER (label
));
26399 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
26405 /* Symbols in the text segment can be accessed without indirecting via the
26406 constant pool; it may take an extra binary operation, but this is still
26407 faster than indirecting via memory. Don't do this when not optimizing,
26408 since we won't be calculating al of the offsets necessary to do this
26412 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
26414 if (optimize
> 0 && TREE_CONSTANT (decl
))
26415 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
26417 default_encode_section_info (decl
, rtl
, first
);
26419 #endif /* !ARM_PE */
26422 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
26424 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
26425 && !strcmp (prefix
, "L"))
26427 arm_ccfsm_state
= 0;
26428 arm_target_insn
= NULL
;
26430 default_internal_label (stream
, prefix
, labelno
);
26433 /* Output code to add DELTA to the first argument, and then jump
26434 to FUNCTION. Used for C++ multiple inheritance. */
26437 arm_thumb1_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26438 HOST_WIDE_INT
, tree function
)
26440 static int thunk_label
= 0;
26443 int mi_delta
= delta
;
26444 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
26446 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
26449 mi_delta
= - mi_delta
;
26451 final_start_function (emit_barrier (), file
, 1);
26455 int labelno
= thunk_label
++;
26456 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
26457 /* Thunks are entered in arm mode when available. */
26458 if (TARGET_THUMB1_ONLY
)
26460 /* push r3 so we can use it as a temporary. */
26461 /* TODO: Omit this save if r3 is not used. */
26462 fputs ("\tpush {r3}\n", file
);
26463 fputs ("\tldr\tr3, ", file
);
26467 fputs ("\tldr\tr12, ", file
);
26469 assemble_name (file
, label
);
26470 fputc ('\n', file
);
26473 /* If we are generating PIC, the ldr instruction below loads
26474 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26475 the address of the add + 8, so we have:
26477 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26480 Note that we have "+ 1" because some versions of GNU ld
26481 don't set the low bit of the result for R_ARM_REL32
26482 relocations against thumb function symbols.
26483 On ARMv6M this is +4, not +8. */
26484 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
26485 assemble_name (file
, labelpc
);
26486 fputs (":\n", file
);
26487 if (TARGET_THUMB1_ONLY
)
26489 /* This is 2 insns after the start of the thunk, so we know it
26490 is 4-byte aligned. */
26491 fputs ("\tadd\tr3, pc, r3\n", file
);
26492 fputs ("\tmov r12, r3\n", file
);
26495 fputs ("\tadd\tr12, pc, r12\n", file
);
26497 else if (TARGET_THUMB1_ONLY
)
26498 fputs ("\tmov r12, r3\n", file
);
26500 if (TARGET_THUMB1_ONLY
)
26502 if (mi_delta
> 255)
26504 fputs ("\tldr\tr3, ", file
);
26505 assemble_name (file
, label
);
26506 fputs ("+4\n", file
);
26507 asm_fprintf (file
, "\t%ss\t%r, %r, r3\n",
26508 mi_op
, this_regno
, this_regno
);
26510 else if (mi_delta
!= 0)
26512 /* Thumb1 unified syntax requires s suffix in instruction name when
26513 one of the operands is immediate. */
26514 asm_fprintf (file
, "\t%ss\t%r, %r, #%d\n",
26515 mi_op
, this_regno
, this_regno
,
26521 /* TODO: Use movw/movt for large constants when available. */
26522 while (mi_delta
!= 0)
26524 if ((mi_delta
& (3 << shift
)) == 0)
26528 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
26529 mi_op
, this_regno
, this_regno
,
26530 mi_delta
& (0xff << shift
));
26531 mi_delta
&= ~(0xff << shift
);
26538 if (TARGET_THUMB1_ONLY
)
26539 fputs ("\tpop\t{r3}\n", file
);
26541 fprintf (file
, "\tbx\tr12\n");
26542 ASM_OUTPUT_ALIGN (file
, 2);
26543 assemble_name (file
, label
);
26544 fputs (":\n", file
);
26547 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26548 rtx tem
= XEXP (DECL_RTL (function
), 0);
26549 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26550 pipeline offset is four rather than eight. Adjust the offset
26552 tem
= plus_constant (GET_MODE (tem
), tem
,
26553 TARGET_THUMB1_ONLY
? -3 : -7);
26554 tem
= gen_rtx_MINUS (GET_MODE (tem
),
26556 gen_rtx_SYMBOL_REF (Pmode
,
26557 ggc_strdup (labelpc
)));
26558 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
26561 /* Output ".word .LTHUNKn". */
26562 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
26564 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
26565 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
26569 fputs ("\tb\t", file
);
26570 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
26571 if (NEED_PLT_RELOC
)
26572 fputs ("(PLT)", file
);
26573 fputc ('\n', file
);
26576 final_end_function ();
26579 /* MI thunk handling for TARGET_32BIT. */
26582 arm32_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
26583 HOST_WIDE_INT vcall_offset
, tree function
)
26585 /* On ARM, this_regno is R0 or R1 depending on
26586 whether the function returns an aggregate or not.
26588 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)),
26590 ? R1_REGNUM
: R0_REGNUM
);
26592 rtx temp
= gen_rtx_REG (Pmode
, IP_REGNUM
);
26593 rtx this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
26594 reload_completed
= 1;
26595 emit_note (NOTE_INSN_PROLOGUE_END
);
26597 /* Add DELTA to THIS_RTX. */
26599 arm_split_constant (PLUS
, Pmode
, NULL_RTX
,
26600 delta
, this_rtx
, this_rtx
, false);
26602 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26603 if (vcall_offset
!= 0)
26605 /* Load *THIS_RTX. */
26606 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, this_rtx
));
26607 /* Compute *THIS_RTX + VCALL_OFFSET. */
26608 arm_split_constant (PLUS
, Pmode
, NULL_RTX
, vcall_offset
, temp
, temp
,
26610 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26611 emit_move_insn (temp
, gen_rtx_MEM (Pmode
, temp
));
26612 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, temp
));
26615 /* Generate a tail call to the target function. */
26616 if (!TREE_USED (function
))
26618 assemble_external (function
);
26619 TREE_USED (function
) = 1;
26621 rtx funexp
= XEXP (DECL_RTL (function
), 0);
26622 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
26623 rtx_insn
* insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
26624 SIBLING_CALL_P (insn
) = 1;
26626 insn
= get_insns ();
26627 shorten_branches (insn
);
26628 final_start_function (insn
, file
, 1);
26629 final (insn
, file
, 1);
26630 final_end_function ();
26632 /* Stop pretending this is a post-reload pass. */
26633 reload_completed
= 0;
26636 /* Output code to add DELTA to the first argument, and then jump
26637 to FUNCTION. Used for C++ multiple inheritance. */
26640 arm_output_mi_thunk (FILE *file
, tree thunk
, HOST_WIDE_INT delta
,
26641 HOST_WIDE_INT vcall_offset
, tree function
)
26644 arm32_output_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26646 arm_thumb1_mi_thunk (file
, thunk
, delta
, vcall_offset
, function
);
26650 arm_emit_vector_const (FILE *file
, rtx x
)
26653 const char * pattern
;
26655 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
26657 switch (GET_MODE (x
))
26659 case E_V2SImode
: pattern
= "%08x"; break;
26660 case E_V4HImode
: pattern
= "%04x"; break;
26661 case E_V8QImode
: pattern
= "%02x"; break;
26662 default: gcc_unreachable ();
26665 fprintf (file
, "0x");
26666 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
26670 element
= CONST_VECTOR_ELT (x
, i
);
26671 fprintf (file
, pattern
, INTVAL (element
));
26677 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26678 HFmode constant pool entries are actually loaded with ldr. */
26680 arm_emit_fp16_const (rtx c
)
26684 bits
= real_to_target (NULL
, CONST_DOUBLE_REAL_VALUE (c
), HFmode
);
26685 if (WORDS_BIG_ENDIAN
)
26686 assemble_zeros (2);
26687 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
26688 if (!WORDS_BIG_ENDIAN
)
26689 assemble_zeros (2);
26693 arm_output_load_gr (rtx
*operands
)
26700 if (!MEM_P (operands
[1])
26701 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
26702 || !REG_P (reg
= XEXP (sum
, 0))
26703 || !CONST_INT_P (offset
= XEXP (sum
, 1))
26704 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
26705 return "wldrw%?\t%0, %1";
26707 /* Fix up an out-of-range load of a GR register. */
26708 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
26709 wcgr
= operands
[0];
26711 output_asm_insn ("ldr%?\t%0, %1", operands
);
26713 operands
[0] = wcgr
;
26715 output_asm_insn ("tmcr%?\t%0, %1", operands
);
26716 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
26721 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26723 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26724 named arg and all anonymous args onto the stack.
26725 XXX I know the prologue shouldn't be pushing registers, but it is faster
26729 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
26733 int second_time ATTRIBUTE_UNUSED
)
26735 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
26738 cfun
->machine
->uses_anonymous_args
= 1;
26739 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
26741 nregs
= pcum
->aapcs_ncrn
;
26744 int res
= arm_needs_doubleword_align (mode
, type
);
26745 if (res
< 0 && warn_psabi
)
26746 inform (input_location
, "parameter passing for argument of "
26747 "type %qT changed in GCC 7.1", type
);
26753 nregs
= pcum
->nregs
;
26755 if (nregs
< NUM_ARG_REGS
)
26756 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
26759 /* We can't rely on the caller doing the proper promotion when
26760 using APCS or ATPCS. */
26763 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
26765 return !TARGET_AAPCS_BASED
;
26768 static machine_mode
26769 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
26771 int *punsignedp ATTRIBUTE_UNUSED
,
26772 const_tree fntype ATTRIBUTE_UNUSED
,
26773 int for_return ATTRIBUTE_UNUSED
)
26775 if (GET_MODE_CLASS (mode
) == MODE_INT
26776 && GET_MODE_SIZE (mode
) < 4)
26784 arm_default_short_enums (void)
26786 return ARM_DEFAULT_SHORT_ENUMS
;
26790 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26793 arm_align_anon_bitfield (void)
26795 return TARGET_AAPCS_BASED
;
26799 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26802 arm_cxx_guard_type (void)
26804 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
26808 /* The EABI says test the least significant bit of a guard variable. */
26811 arm_cxx_guard_mask_bit (void)
26813 return TARGET_AAPCS_BASED
;
26817 /* The EABI specifies that all array cookies are 8 bytes long. */
26820 arm_get_cookie_size (tree type
)
26824 if (!TARGET_AAPCS_BASED
)
26825 return default_cxx_get_cookie_size (type
);
26827 size
= build_int_cst (sizetype
, 8);
26832 /* The EABI says that array cookies should also contain the element size. */
26835 arm_cookie_has_size (void)
26837 return TARGET_AAPCS_BASED
;
26841 /* The EABI says constructors and destructors should return a pointer to
26842 the object constructed/destroyed. */
26845 arm_cxx_cdtor_returns_this (void)
26847 return TARGET_AAPCS_BASED
;
26850 /* The EABI says that an inline function may never be the key
26854 arm_cxx_key_method_may_be_inline (void)
26856 return !TARGET_AAPCS_BASED
;
26860 arm_cxx_determine_class_data_visibility (tree decl
)
26862 if (!TARGET_AAPCS_BASED
26863 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
26866 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26867 is exported. However, on systems without dynamic vague linkage,
26868 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26869 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
26870 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
26872 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
26873 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
26877 arm_cxx_class_data_always_comdat (void)
26879 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26880 vague linkage if the class has no key function. */
26881 return !TARGET_AAPCS_BASED
;
26885 /* The EABI says __aeabi_atexit should be used to register static
26889 arm_cxx_use_aeabi_atexit (void)
26891 return TARGET_AAPCS_BASED
;
26896 arm_set_return_address (rtx source
, rtx scratch
)
26898 arm_stack_offsets
*offsets
;
26899 HOST_WIDE_INT delta
;
26901 unsigned long saved_regs
;
26903 offsets
= arm_get_frame_offsets ();
26904 saved_regs
= offsets
->saved_regs_mask
;
26906 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
26907 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26910 if (frame_pointer_needed
)
26911 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
, -4);
26914 /* LR will be the first saved register. */
26915 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
26920 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
26921 GEN_INT (delta
& ~4095)));
26926 addr
= stack_pointer_rtx
;
26928 addr
= plus_constant (Pmode
, addr
, delta
);
26931 /* The store needs to be marked to prevent DSE from deleting
26932 it as dead if it is based on fp. */
26933 mem
= gen_frame_mem (Pmode
, addr
);
26934 MEM_VOLATILE_P (mem
) = true;
26935 emit_move_insn (mem
, source
);
26941 thumb_set_return_address (rtx source
, rtx scratch
)
26943 arm_stack_offsets
*offsets
;
26944 HOST_WIDE_INT delta
;
26945 HOST_WIDE_INT limit
;
26948 unsigned long mask
;
26952 offsets
= arm_get_frame_offsets ();
26953 mask
= offsets
->saved_regs_mask
;
26954 if (mask
& (1 << LR_REGNUM
))
26957 /* Find the saved regs. */
26958 if (frame_pointer_needed
)
26960 delta
= offsets
->soft_frame
- offsets
->saved_args
;
26961 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
26967 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
26970 /* Allow for the stack frame. */
26971 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
26973 /* The link register is always the first saved register. */
26976 /* Construct the address. */
26977 addr
= gen_rtx_REG (SImode
, reg
);
26980 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
26981 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
26985 addr
= plus_constant (Pmode
, addr
, delta
);
26987 /* The store needs to be marked to prevent DSE from deleting
26988 it as dead if it is based on fp. */
26989 mem
= gen_frame_mem (Pmode
, addr
);
26990 MEM_VOLATILE_P (mem
) = true;
26991 emit_move_insn (mem
, source
);
26994 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
26997 /* Implements target hook vector_mode_supported_p. */
26999 arm_vector_mode_supported_p (machine_mode mode
)
27001 /* Neon also supports V2SImode, etc. listed in the clause below. */
27002 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
27003 || mode
== V4HFmode
|| mode
== V16QImode
|| mode
== V4SFmode
27004 || mode
== V2DImode
|| mode
== V8HFmode
))
27007 if ((TARGET_NEON
|| TARGET_IWMMXT
)
27008 && ((mode
== V2SImode
)
27009 || (mode
== V4HImode
)
27010 || (mode
== V8QImode
)))
27013 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
27014 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
27015 || mode
== V2HAmode
))
27021 /* Implements target hook array_mode_supported_p. */
27024 arm_array_mode_supported_p (machine_mode mode
,
27025 unsigned HOST_WIDE_INT nelems
)
27028 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
27029 && (nelems
>= 2 && nelems
<= 4))
27035 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27036 registers when autovectorizing for Neon, at least until multiple vector
27037 widths are supported properly by the middle-end. */
27039 static machine_mode
27040 arm_preferred_simd_mode (scalar_mode mode
)
27046 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SFmode
: V4SFmode
;
27048 return TARGET_NEON_VECTORIZE_DOUBLE
? V2SImode
: V4SImode
;
27050 return TARGET_NEON_VECTORIZE_DOUBLE
? V4HImode
: V8HImode
;
27052 return TARGET_NEON_VECTORIZE_DOUBLE
? V8QImode
: V16QImode
;
27054 if (!TARGET_NEON_VECTORIZE_DOUBLE
)
27061 if (TARGET_REALLY_IWMMXT
)
27077 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27079 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27080 using r0-r4 for function arguments, r7 for the stack frame and don't have
27081 enough left over to do doubleword arithmetic. For Thumb-2 all the
27082 potentially problematic instructions accept high registers so this is not
27083 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27084 that require many low registers. */
27086 arm_class_likely_spilled_p (reg_class_t rclass
)
27088 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
27089 || rclass
== CC_REG
)
27095 /* Implements target hook small_register_classes_for_mode_p. */
27097 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED
)
27099 return TARGET_THUMB1
;
27102 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27103 ARM insns and therefore guarantee that the shift count is modulo 256.
27104 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27105 guarantee no particular behavior for out-of-range counts. */
27107 static unsigned HOST_WIDE_INT
27108 arm_shift_truncation_mask (machine_mode mode
)
27110 return mode
== SImode
? 255 : 0;
27114 /* Map internal gcc register numbers to DWARF2 register numbers. */
27117 arm_dbx_register_number (unsigned int regno
)
27122 if (IS_VFP_REGNUM (regno
))
27124 /* See comment in arm_dwarf_register_span. */
27125 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27126 return 64 + regno
- FIRST_VFP_REGNUM
;
27128 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
27131 if (IS_IWMMXT_GR_REGNUM (regno
))
27132 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
27134 if (IS_IWMMXT_REGNUM (regno
))
27135 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
27137 return DWARF_FRAME_REGISTERS
;
27140 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27141 GCC models tham as 64 32-bit registers, so we need to describe this to
27142 the DWARF generation code. Other registers can use the default. */
27144 arm_dwarf_register_span (rtx rtl
)
27152 regno
= REGNO (rtl
);
27153 if (!IS_VFP_REGNUM (regno
))
27156 /* XXX FIXME: The EABI defines two VFP register ranges:
27157 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27159 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27160 corresponding D register. Until GDB supports this, we shall use the
27161 legacy encodings. We also use these encodings for D0-D15 for
27162 compatibility with older debuggers. */
27163 mode
= GET_MODE (rtl
);
27164 if (GET_MODE_SIZE (mode
) < 8)
27167 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
27169 nregs
= GET_MODE_SIZE (mode
) / 4;
27170 for (i
= 0; i
< nregs
; i
+= 2)
27171 if (TARGET_BIG_END
)
27173 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27174 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
);
27178 parts
[i
] = gen_rtx_REG (SImode
, regno
+ i
);
27179 parts
[i
+ 1] = gen_rtx_REG (SImode
, regno
+ i
+ 1);
27184 nregs
= GET_MODE_SIZE (mode
) / 8;
27185 for (i
= 0; i
< nregs
; i
++)
27186 parts
[i
] = gen_rtx_REG (DImode
, regno
+ i
);
27189 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nregs
, parts
));
27192 #if ARM_UNWIND_INFO
27193 /* Emit unwind directives for a store-multiple instruction or stack pointer
27194 push during alignment.
27195 These should only ever be generated by the function prologue code, so
27196 expect them to have a particular form.
27197 The store-multiple instruction sometimes pushes pc as the last register,
27198 although it should not be tracked into unwind information, or for -Os
27199 sometimes pushes some dummy registers before first register that needs
27200 to be tracked in unwind information; such dummy registers are there just
27201 to avoid separate stack adjustment, and will not be restored in the
27205 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
27208 HOST_WIDE_INT offset
;
27209 HOST_WIDE_INT nregs
;
27213 unsigned padfirst
= 0, padlast
= 0;
27216 e
= XVECEXP (p
, 0, 0);
27217 gcc_assert (GET_CODE (e
) == SET
);
27219 /* First insn will adjust the stack pointer. */
27220 gcc_assert (GET_CODE (e
) == SET
27221 && REG_P (SET_DEST (e
))
27222 && REGNO (SET_DEST (e
)) == SP_REGNUM
27223 && GET_CODE (SET_SRC (e
)) == PLUS
);
27225 offset
= -INTVAL (XEXP (SET_SRC (e
), 1));
27226 nregs
= XVECLEN (p
, 0) - 1;
27227 gcc_assert (nregs
);
27229 reg
= REGNO (SET_SRC (XVECEXP (p
, 0, 1)));
27232 /* For -Os dummy registers can be pushed at the beginning to
27233 avoid separate stack pointer adjustment. */
27234 e
= XVECEXP (p
, 0, 1);
27235 e
= XEXP (SET_DEST (e
), 0);
27236 if (GET_CODE (e
) == PLUS
)
27237 padfirst
= INTVAL (XEXP (e
, 1));
27238 gcc_assert (padfirst
== 0 || optimize_size
);
27239 /* The function prologue may also push pc, but not annotate it as it is
27240 never restored. We turn this into a stack pointer adjustment. */
27241 e
= XVECEXP (p
, 0, nregs
);
27242 e
= XEXP (SET_DEST (e
), 0);
27243 if (GET_CODE (e
) == PLUS
)
27244 padlast
= offset
- INTVAL (XEXP (e
, 1)) - 4;
27246 padlast
= offset
- 4;
27247 gcc_assert (padlast
== 0 || padlast
== 4);
27249 fprintf (asm_out_file
, "\t.pad #4\n");
27251 fprintf (asm_out_file
, "\t.save {");
27253 else if (IS_VFP_REGNUM (reg
))
27256 fprintf (asm_out_file
, "\t.vsave {");
27259 /* Unknown register type. */
27260 gcc_unreachable ();
27262 /* If the stack increment doesn't match the size of the saved registers,
27263 something has gone horribly wrong. */
27264 gcc_assert (offset
== padfirst
+ nregs
* reg_size
+ padlast
);
27268 /* The remaining insns will describe the stores. */
27269 for (i
= 1; i
<= nregs
; i
++)
27271 /* Expect (set (mem <addr>) (reg)).
27272 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27273 e
= XVECEXP (p
, 0, i
);
27274 gcc_assert (GET_CODE (e
) == SET
27275 && MEM_P (SET_DEST (e
))
27276 && REG_P (SET_SRC (e
)));
27278 reg
= REGNO (SET_SRC (e
));
27279 gcc_assert (reg
>= lastreg
);
27282 fprintf (asm_out_file
, ", ");
27283 /* We can't use %r for vfp because we need to use the
27284 double precision register names. */
27285 if (IS_VFP_REGNUM (reg
))
27286 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
27288 asm_fprintf (asm_out_file
, "%r", reg
);
27292 /* Check that the addresses are consecutive. */
27293 e
= XEXP (SET_DEST (e
), 0);
27294 if (GET_CODE (e
) == PLUS
)
27295 gcc_assert (REG_P (XEXP (e
, 0))
27296 && REGNO (XEXP (e
, 0)) == SP_REGNUM
27297 && CONST_INT_P (XEXP (e
, 1))
27298 && offset
== INTVAL (XEXP (e
, 1)));
27302 && REGNO (e
) == SP_REGNUM
);
27303 offset
+= reg_size
;
27306 fprintf (asm_out_file
, "}\n");
27308 fprintf (asm_out_file
, "\t.pad #%d\n", padfirst
);
27311 /* Emit unwind directives for a SET. */
27314 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
27322 switch (GET_CODE (e0
))
27325 /* Pushing a single register. */
27326 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
27327 || !REG_P (XEXP (XEXP (e0
, 0), 0))
27328 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
27331 asm_fprintf (asm_out_file
, "\t.save ");
27332 if (IS_VFP_REGNUM (REGNO (e1
)))
27333 asm_fprintf(asm_out_file
, "{d%d}\n",
27334 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
27336 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
27340 if (REGNO (e0
) == SP_REGNUM
)
27342 /* A stack increment. */
27343 if (GET_CODE (e1
) != PLUS
27344 || !REG_P (XEXP (e1
, 0))
27345 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
27346 || !CONST_INT_P (XEXP (e1
, 1)))
27349 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
27350 -INTVAL (XEXP (e1
, 1)));
27352 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
27354 HOST_WIDE_INT offset
;
27356 if (GET_CODE (e1
) == PLUS
)
27358 if (!REG_P (XEXP (e1
, 0))
27359 || !CONST_INT_P (XEXP (e1
, 1)))
27361 reg
= REGNO (XEXP (e1
, 0));
27362 offset
= INTVAL (XEXP (e1
, 1));
27363 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
27364 HARD_FRAME_POINTER_REGNUM
, reg
,
27367 else if (REG_P (e1
))
27370 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
27371 HARD_FRAME_POINTER_REGNUM
, reg
);
27376 else if (REG_P (e1
) && REGNO (e1
) == SP_REGNUM
)
27378 /* Move from sp to reg. */
27379 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
27381 else if (GET_CODE (e1
) == PLUS
27382 && REG_P (XEXP (e1
, 0))
27383 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
27384 && CONST_INT_P (XEXP (e1
, 1)))
27386 /* Set reg to offset from sp. */
27387 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
27388 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
27400 /* Emit unwind directives for the given insn. */
27403 arm_unwind_emit (FILE * asm_out_file
, rtx_insn
*insn
)
27406 bool handled_one
= false;
27408 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27411 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27412 && (TREE_NOTHROW (current_function_decl
)
27413 || crtl
->all_throwers_are_sibcalls
))
27416 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
27419 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
27421 switch (REG_NOTE_KIND (note
))
27423 case REG_FRAME_RELATED_EXPR
:
27424 pat
= XEXP (note
, 0);
27427 case REG_CFA_REGISTER
:
27428 pat
= XEXP (note
, 0);
27431 pat
= PATTERN (insn
);
27432 if (GET_CODE (pat
) == PARALLEL
)
27433 pat
= XVECEXP (pat
, 0, 0);
27436 /* Only emitted for IS_STACKALIGN re-alignment. */
27441 src
= SET_SRC (pat
);
27442 dest
= SET_DEST (pat
);
27444 gcc_assert (src
== stack_pointer_rtx
);
27445 reg
= REGNO (dest
);
27446 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27449 handled_one
= true;
27452 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27453 to get correct dwarf information for shrink-wrap. We should not
27454 emit unwind information for it because these are used either for
27455 pretend arguments or notes to adjust sp and restore registers from
27457 case REG_CFA_DEF_CFA
:
27458 case REG_CFA_ADJUST_CFA
:
27459 case REG_CFA_RESTORE
:
27462 case REG_CFA_EXPRESSION
:
27463 case REG_CFA_OFFSET
:
27464 /* ??? Only handling here what we actually emit. */
27465 gcc_unreachable ();
27473 pat
= PATTERN (insn
);
27476 switch (GET_CODE (pat
))
27479 arm_unwind_emit_set (asm_out_file
, pat
);
27483 /* Store multiple. */
27484 arm_unwind_emit_sequence (asm_out_file
, pat
);
27493 /* Output a reference from a function exception table to the type_info
27494 object X. The EABI specifies that the symbol should be relocated by
27495 an R_ARM_TARGET2 relocation. */
27498 arm_output_ttype (rtx x
)
27500 fputs ("\t.word\t", asm_out_file
);
27501 output_addr_const (asm_out_file
, x
);
27502 /* Use special relocations for symbol references. */
27503 if (!CONST_INT_P (x
))
27504 fputs ("(TARGET2)", asm_out_file
);
27505 fputc ('\n', asm_out_file
);
27510 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27513 arm_asm_emit_except_personality (rtx personality
)
27515 fputs ("\t.personality\t", asm_out_file
);
27516 output_addr_const (asm_out_file
, personality
);
27517 fputc ('\n', asm_out_file
);
27519 #endif /* ARM_UNWIND_INFO */
27521 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27524 arm_asm_init_sections (void)
27526 #if ARM_UNWIND_INFO
27527 exception_section
= get_unnamed_section (0, output_section_asm_op
,
27529 #endif /* ARM_UNWIND_INFO */
27531 #ifdef OBJECT_FORMAT_ELF
27532 if (target_pure_code
)
27533 text_section
->unnamed
.data
= "\t.section .text,\"0x20000006\",%progbits";
27537 /* Output unwind directives for the start/end of a function. */
27540 arm_output_fn_unwind (FILE * f
, bool prologue
)
27542 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
27546 fputs ("\t.fnstart\n", f
);
27549 /* If this function will never be unwound, then mark it as such.
27550 The came condition is used in arm_unwind_emit to suppress
27551 the frame annotations. */
27552 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
27553 && (TREE_NOTHROW (current_function_decl
)
27554 || crtl
->all_throwers_are_sibcalls
))
27555 fputs("\t.cantunwind\n", f
);
27557 fputs ("\t.fnend\n", f
);
27562 arm_emit_tls_decoration (FILE *fp
, rtx x
)
27564 enum tls_reloc reloc
;
27567 val
= XVECEXP (x
, 0, 0);
27568 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
27570 output_addr_const (fp
, val
);
27575 fputs ("(tlsgd)", fp
);
27578 fputs ("(tlsldm)", fp
);
27581 fputs ("(tlsldo)", fp
);
27584 fputs ("(gottpoff)", fp
);
27587 fputs ("(tpoff)", fp
);
27590 fputs ("(tlsdesc)", fp
);
27593 gcc_unreachable ();
27602 fputs (" + (. - ", fp
);
27603 output_addr_const (fp
, XVECEXP (x
, 0, 2));
27604 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27605 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
27606 output_addr_const (fp
, XVECEXP (x
, 0, 3));
27616 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27619 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
27621 gcc_assert (size
== 4);
27622 fputs ("\t.word\t", file
);
27623 output_addr_const (file
, x
);
27624 fputs ("(tlsldo)", file
);
27627 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27630 arm_output_addr_const_extra (FILE *fp
, rtx x
)
27632 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
27633 return arm_emit_tls_decoration (fp
, x
);
27634 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
27637 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
27639 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
27640 assemble_name_raw (fp
, label
);
27644 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
27646 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
27650 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27654 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
27656 output_addr_const (fp
, XVECEXP (x
, 0, 0));
27660 output_addr_const (fp
, XVECEXP (x
, 0, 1));
27664 else if (GET_CODE (x
) == CONST_VECTOR
)
27665 return arm_emit_vector_const (fp
, x
);
27670 /* Output assembly for a shift instruction.
27671 SET_FLAGS determines how the instruction modifies the condition codes.
27672 0 - Do not set condition codes.
27673 1 - Set condition codes.
27674 2 - Use smallest instruction. */
27676 arm_output_shift(rtx
* operands
, int set_flags
)
27679 static const char flag_chars
[3] = {'?', '.', '!'};
27684 c
= flag_chars
[set_flags
];
27685 shift
= shift_op(operands
[3], &val
);
27689 operands
[2] = GEN_INT(val
);
27690 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
27693 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
27695 output_asm_insn (pattern
, operands
);
27699 /* Output assembly for a WMMX immediate shift instruction. */
27701 arm_output_iwmmxt_shift_immediate (const char *insn_name
, rtx
*operands
, bool wror_or_wsra
)
27703 int shift
= INTVAL (operands
[2]);
27705 machine_mode opmode
= GET_MODE (operands
[0]);
27707 gcc_assert (shift
>= 0);
27709 /* If the shift value in the register versions is > 63 (for D qualifier),
27710 31 (for W qualifier) or 15 (for H qualifier). */
27711 if (((opmode
== V4HImode
) && (shift
> 15))
27712 || ((opmode
== V2SImode
) && (shift
> 31))
27713 || ((opmode
== DImode
) && (shift
> 63)))
27717 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27718 output_asm_insn (templ
, operands
);
27719 if (opmode
== DImode
)
27721 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, 32);
27722 output_asm_insn (templ
, operands
);
27727 /* The destination register will contain all zeros. */
27728 sprintf (templ
, "wzero\t%%0");
27729 output_asm_insn (templ
, operands
);
27734 if ((opmode
== DImode
) && (shift
> 32))
27736 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, 32);
27737 output_asm_insn (templ
, operands
);
27738 sprintf (templ
, "%s\t%%0, %%0, #%d", insn_name
, shift
- 32);
27739 output_asm_insn (templ
, operands
);
27743 sprintf (templ
, "%s\t%%0, %%1, #%d", insn_name
, shift
);
27744 output_asm_insn (templ
, operands
);
27749 /* Output assembly for a WMMX tinsr instruction. */
27751 arm_output_iwmmxt_tinsr (rtx
*operands
)
27753 int mask
= INTVAL (operands
[3]);
27756 int units
= mode_nunits
[GET_MODE (operands
[0])];
27757 gcc_assert ((mask
& (mask
- 1)) == 0);
27758 for (i
= 0; i
< units
; ++i
)
27760 if ((mask
& 0x01) == 1)
27766 gcc_assert (i
< units
);
27768 switch (GET_MODE (operands
[0]))
27771 sprintf (templ
, "tinsrb%%?\t%%0, %%2, #%d", i
);
27774 sprintf (templ
, "tinsrh%%?\t%%0, %%2, #%d", i
);
27777 sprintf (templ
, "tinsrw%%?\t%%0, %%2, #%d", i
);
27780 gcc_unreachable ();
27783 output_asm_insn (templ
, operands
);
27788 /* Output a Thumb-1 casesi dispatch sequence. */
27790 thumb1_output_casesi (rtx
*operands
)
27792 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[0])));
27794 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27796 switch (GET_MODE(diff_vec
))
27799 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27800 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27802 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
27803 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27805 return "bl\t%___gnu_thumb1_case_si";
27807 gcc_unreachable ();
27811 /* Output a Thumb-2 casesi instruction. */
27813 thumb2_output_casesi (rtx
*operands
)
27815 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
27817 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
27819 output_asm_insn ("cmp\t%0, %1", operands
);
27820 output_asm_insn ("bhi\t%l3", operands
);
27821 switch (GET_MODE(diff_vec
))
27824 return "tbb\t[%|pc, %0]";
27826 return "tbh\t[%|pc, %0, lsl #1]";
27830 output_asm_insn ("adr\t%4, %l2", operands
);
27831 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
27832 output_asm_insn ("add\t%4, %4, %5", operands
);
27837 output_asm_insn ("adr\t%4, %l2", operands
);
27838 return "ldr\t%|pc, [%4, %0, lsl #2]";
27841 gcc_unreachable ();
27845 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27846 per-core tuning structs. */
27848 arm_issue_rate (void)
27850 return current_tune
->issue_rate
;
27853 /* Return how many instructions should scheduler lookahead to choose the
27856 arm_first_cycle_multipass_dfa_lookahead (void)
27858 int issue_rate
= arm_issue_rate ();
27860 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
27863 /* Enable modeling of L2 auto-prefetcher. */
27865 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
, int ready_index
)
27867 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
27871 arm_mangle_type (const_tree type
)
27873 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27874 has to be managled as if it is in the "std" namespace. */
27875 if (TARGET_AAPCS_BASED
27876 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
27877 return "St9__va_list";
27879 /* Half-precision float. */
27880 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
27883 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27885 if (TYPE_NAME (type
) != NULL
)
27886 return arm_mangle_builtin_type (type
);
27888 /* Use the default mangling. */
27892 /* Order of allocation of core registers for Thumb: this allocation is
27893 written over the corresponding initial entries of the array
27894 initialized with REG_ALLOC_ORDER. We allocate all low registers
27895 first. Saving and restoring a low register is usually cheaper than
27896 using a call-clobbered high register. */
27898 static const int thumb_core_reg_alloc_order
[] =
27900 3, 2, 1, 0, 4, 5, 6, 7,
27901 12, 14, 8, 9, 10, 11
27904 /* Adjust register allocation order when compiling for Thumb. */
27907 arm_order_regs_for_local_alloc (void)
27909 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
27910 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
27912 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
27913 sizeof (thumb_core_reg_alloc_order
));
27916 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27919 arm_frame_pointer_required (void)
27921 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
27924 /* If the function receives nonlocal gotos, it needs to save the frame
27925 pointer in the nonlocal_goto_save_area object. */
27926 if (cfun
->has_nonlocal_label
)
27929 /* The frame pointer is required for non-leaf APCS frames. */
27930 if (TARGET_ARM
&& TARGET_APCS_FRAME
&& !crtl
->is_leaf
)
27933 /* If we are probing the stack in the prologue, we will have a faulting
27934 instruction prior to the stack adjustment and this requires a frame
27935 pointer if we want to catch the exception using the EABI unwinder. */
27936 if (!IS_INTERRUPT (arm_current_func_type ())
27937 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
27938 || flag_stack_clash_protection
)
27939 && arm_except_unwind_info (&global_options
) == UI_TARGET
27940 && cfun
->can_throw_non_call_exceptions
)
27942 HOST_WIDE_INT size
= get_frame_size ();
27944 /* That's irrelevant if there is no stack adjustment. */
27948 /* That's relevant only if there is a stack probe. */
27949 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
27951 /* We don't have the final size of the frame so adjust. */
27952 size
+= 32 * UNITS_PER_WORD
;
27953 if (size
> PROBE_INTERVAL
&& size
> get_stack_check_protect ())
27963 /* Only thumb1 can't support conditional execution, so return true if
27964 the target is not thumb1. */
27966 arm_have_conditional_execution (void)
27968 return !TARGET_THUMB1
;
27971 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27972 static HOST_WIDE_INT
27973 arm_vector_alignment (const_tree type
)
27975 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
27977 if (TARGET_AAPCS_BASED
)
27978 align
= MIN (align
, 64);
27983 static unsigned int
27984 arm_autovectorize_vector_sizes (void)
27986 return TARGET_NEON_VECTORIZE_DOUBLE
? 0 : (16 | 8);
27990 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
27992 /* Vectors which aren't in packed structures will not be less aligned than
27993 the natural alignment of their element type, so this is safe. */
27994 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
27997 return default_builtin_vector_alignment_reachable (type
, is_packed
);
28001 arm_builtin_support_vector_misalignment (machine_mode mode
,
28002 const_tree type
, int misalignment
,
28005 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
&& unaligned_access
)
28007 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
28012 /* If the misalignment is unknown, we should be able to handle the access
28013 so long as it is not to a member of a packed data structure. */
28014 if (misalignment
== -1)
28017 /* Return true if the misalignment is a multiple of the natural alignment
28018 of the vector's element type. This is probably always going to be
28019 true in practice, since we've already established that this isn't a
28021 return ((misalignment
% align
) == 0);
28024 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
28029 arm_conditional_register_usage (void)
28033 if (TARGET_THUMB1
&& optimize_size
)
28035 /* When optimizing for size on Thumb-1, it's better not
28036 to use the HI regs, because of the overhead of
28038 for (regno
= FIRST_HI_REGNUM
; regno
<= LAST_HI_REGNUM
; ++regno
)
28039 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
28042 /* The link register can be clobbered by any branch insn,
28043 but we have no way to track that at present, so mark
28044 it as unavailable. */
28046 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
28048 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
28050 /* VFPv3 registers are disabled when earlier VFP
28051 versions are selected due to the definition of
28052 LAST_VFP_REGNUM. */
28053 for (regno
= FIRST_VFP_REGNUM
;
28054 regno
<= LAST_VFP_REGNUM
; ++ regno
)
28056 fixed_regs
[regno
] = 0;
28057 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
28058 || regno
>= FIRST_VFP_REGNUM
+ 32;
28062 if (TARGET_REALLY_IWMMXT
)
28064 regno
= FIRST_IWMMXT_GR_REGNUM
;
28065 /* The 2002/10/09 revision of the XScale ABI has wCG0
28066 and wCG1 as call-preserved registers. The 2002/11/21
28067 revision changed this so that all wCG registers are
28068 scratch registers. */
28069 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
28070 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
28071 fixed_regs
[regno
] = 0;
28072 /* The XScale ABI has wR0 - wR9 as scratch registers,
28073 the rest as call-preserved registers. */
28074 for (regno
= FIRST_IWMMXT_REGNUM
;
28075 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
28077 fixed_regs
[regno
] = 0;
28078 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
28082 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
28084 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28085 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
28087 else if (TARGET_APCS_STACK
)
28089 fixed_regs
[10] = 1;
28090 call_used_regs
[10] = 1;
28092 /* -mcaller-super-interworking reserves r11 for calls to
28093 _interwork_r11_call_via_rN(). Making the register global
28094 is an easy way of ensuring that it remains valid for all
28096 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
28097 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
28099 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28100 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28101 if (TARGET_CALLER_INTERWORKING
)
28102 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
28104 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28108 arm_preferred_rename_class (reg_class_t rclass
)
28110 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28111 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28112 and code size can be reduced. */
28113 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
28119 /* Compute the attribute "length" of insn "*push_multi".
28120 So this function MUST be kept in sync with that insn pattern. */
28122 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
28124 int i
, regno
, hi_reg
;
28125 int num_saves
= XVECLEN (parallel_op
, 0);
28135 regno
= REGNO (first_op
);
28136 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28137 list is 8-bit. Normally this means all registers in the list must be
28138 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28139 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28140 with 16-bit encoding. */
28141 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28142 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
28144 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
28145 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
28153 /* Compute the attribute "length" of insn. Currently, this function is used
28154 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28155 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28156 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28157 true if OPERANDS contains insn which explicit updates base register. */
28160 arm_attr_length_pop_multi (rtx
*operands
, bool return_pc
, bool write_back_p
)
28169 rtx parallel_op
= operands
[0];
28170 /* Initialize to elements number of PARALLEL. */
28171 unsigned indx
= XVECLEN (parallel_op
, 0) - 1;
28172 /* Initialize the value to base register. */
28173 unsigned regno
= REGNO (operands
[1]);
28174 /* Skip return and write back pattern.
28175 We only need register pop pattern for later analysis. */
28176 unsigned first_indx
= 0;
28177 first_indx
+= return_pc
? 1 : 0;
28178 first_indx
+= write_back_p
? 1 : 0;
28180 /* A pop operation can be done through LDM or POP. If the base register is SP
28181 and if it's with write back, then a LDM will be alias of POP. */
28182 bool pop_p
= (regno
== SP_REGNUM
&& write_back_p
);
28183 bool ldm_p
= !pop_p
;
28185 /* Check base register for LDM. */
28186 if (ldm_p
&& REGNO_REG_CLASS (regno
) == HI_REGS
)
28189 /* Check each register in the list. */
28190 for (; indx
>= first_indx
; indx
--)
28192 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, indx
), 0));
28193 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28194 comment in arm_attr_length_push_multi. */
28195 if (REGNO_REG_CLASS (regno
) == HI_REGS
28196 && (regno
!= PC_REGNUM
|| ldm_p
))
28203 /* Compute the number of instructions emitted by output_move_double. */
28205 arm_count_output_move_double_insns (rtx
*operands
)
28209 /* output_move_double may modify the operands array, so call it
28210 here on a copy of the array. */
28211 ops
[0] = operands
[0];
28212 ops
[1] = operands
[1];
28213 output_move_double (ops
, false, &count
);
28218 vfp3_const_double_for_fract_bits (rtx operand
)
28220 REAL_VALUE_TYPE r0
;
28222 if (!CONST_DOUBLE_P (operand
))
28225 r0
= *CONST_DOUBLE_REAL_VALUE (operand
);
28226 if (exact_real_inverse (DFmode
, &r0
)
28227 && !REAL_VALUE_NEGATIVE (r0
))
28229 if (exact_real_truncate (DFmode
, &r0
))
28231 HOST_WIDE_INT value
= real_to_integer (&r0
);
28232 value
= value
& 0xffffffff;
28233 if ((value
!= 0) && ( (value
& (value
- 1)) == 0))
28235 int ret
= exact_log2 (value
);
28236 gcc_assert (IN_RANGE (ret
, 0, 31));
28244 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28245 log2 is in [1, 32], return that log2. Otherwise return -1.
28246 This is used in the patterns for vcvt.s32.f32 floating-point to
28247 fixed-point conversions. */
28250 vfp3_const_double_for_bits (rtx x
)
28252 const REAL_VALUE_TYPE
*r
;
28254 if (!CONST_DOUBLE_P (x
))
28257 r
= CONST_DOUBLE_REAL_VALUE (x
);
28259 if (REAL_VALUE_NEGATIVE (*r
)
28260 || REAL_VALUE_ISNAN (*r
)
28261 || REAL_VALUE_ISINF (*r
)
28262 || !real_isinteger (r
, SFmode
))
28265 HOST_WIDE_INT hwint
= exact_log2 (real_to_integer (r
));
28267 /* The exact_log2 above will have returned -1 if this is
28268 not an exact log2. */
28269 if (!IN_RANGE (hwint
, 1, 32))
28276 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28279 arm_pre_atomic_barrier (enum memmodel model
)
28281 if (need_atomic_barrier_p (model
, true))
28282 emit_insn (gen_memory_barrier ());
28286 arm_post_atomic_barrier (enum memmodel model
)
28288 if (need_atomic_barrier_p (model
, false))
28289 emit_insn (gen_memory_barrier ());
28292 /* Emit the load-exclusive and store-exclusive instructions.
28293 Use acquire and release versions if necessary. */
28296 arm_emit_load_exclusive (machine_mode mode
, rtx rval
, rtx mem
, bool acq
)
28298 rtx (*gen
) (rtx
, rtx
);
28304 case E_QImode
: gen
= gen_arm_load_acquire_exclusiveqi
; break;
28305 case E_HImode
: gen
= gen_arm_load_acquire_exclusivehi
; break;
28306 case E_SImode
: gen
= gen_arm_load_acquire_exclusivesi
; break;
28307 case E_DImode
: gen
= gen_arm_load_acquire_exclusivedi
; break;
28309 gcc_unreachable ();
28316 case E_QImode
: gen
= gen_arm_load_exclusiveqi
; break;
28317 case E_HImode
: gen
= gen_arm_load_exclusivehi
; break;
28318 case E_SImode
: gen
= gen_arm_load_exclusivesi
; break;
28319 case E_DImode
: gen
= gen_arm_load_exclusivedi
; break;
28321 gcc_unreachable ();
28325 emit_insn (gen (rval
, mem
));
28329 arm_emit_store_exclusive (machine_mode mode
, rtx bval
, rtx rval
,
28332 rtx (*gen
) (rtx
, rtx
, rtx
);
28338 case E_QImode
: gen
= gen_arm_store_release_exclusiveqi
; break;
28339 case E_HImode
: gen
= gen_arm_store_release_exclusivehi
; break;
28340 case E_SImode
: gen
= gen_arm_store_release_exclusivesi
; break;
28341 case E_DImode
: gen
= gen_arm_store_release_exclusivedi
; break;
28343 gcc_unreachable ();
28350 case E_QImode
: gen
= gen_arm_store_exclusiveqi
; break;
28351 case E_HImode
: gen
= gen_arm_store_exclusivehi
; break;
28352 case E_SImode
: gen
= gen_arm_store_exclusivesi
; break;
28353 case E_DImode
: gen
= gen_arm_store_exclusivedi
; break;
28355 gcc_unreachable ();
28359 emit_insn (gen (bval
, rval
, mem
));
28362 /* Mark the previous jump instruction as unlikely. */
28365 emit_unlikely_jump (rtx insn
)
28367 rtx_insn
*jump
= emit_jump_insn (insn
);
28368 add_reg_br_prob_note (jump
, profile_probability::very_unlikely ());
28371 /* Expand a compare and swap pattern. */
28374 arm_expand_compare_and_swap (rtx operands
[])
28376 rtx bval
, bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
28378 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
28380 bval
= operands
[0];
28381 rval
= operands
[1];
28383 oldval
= operands
[3];
28384 newval
= operands
[4];
28385 is_weak
= operands
[5];
28386 mod_s
= operands
[6];
28387 mod_f
= operands
[7];
28388 mode
= GET_MODE (mem
);
28390 /* Normally the succ memory model must be stronger than fail, but in the
28391 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28392 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28394 if (TARGET_HAVE_LDACQ
28395 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
28396 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
28397 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
28403 /* For narrow modes, we're going to perform the comparison in SImode,
28404 so do the zero-extension now. */
28405 rval
= gen_reg_rtx (SImode
);
28406 oldval
= convert_modes (SImode
, mode
, oldval
, true);
28410 /* Force the value into a register if needed. We waited until after
28411 the zero-extension above to do this properly. */
28412 if (!arm_add_operand (oldval
, SImode
))
28413 oldval
= force_reg (SImode
, oldval
);
28417 if (!cmpdi_operand (oldval
, mode
))
28418 oldval
= force_reg (mode
, oldval
);
28422 gcc_unreachable ();
28429 case E_QImode
: gen
= gen_atomic_compare_and_swapt1qi_1
; break;
28430 case E_HImode
: gen
= gen_atomic_compare_and_swapt1hi_1
; break;
28431 case E_SImode
: gen
= gen_atomic_compare_and_swapt1si_1
; break;
28432 case E_DImode
: gen
= gen_atomic_compare_and_swapt1di_1
; break;
28434 gcc_unreachable ();
28441 case E_QImode
: gen
= gen_atomic_compare_and_swap32qi_1
; break;
28442 case E_HImode
: gen
= gen_atomic_compare_and_swap32hi_1
; break;
28443 case E_SImode
: gen
= gen_atomic_compare_and_swap32si_1
; break;
28444 case E_DImode
: gen
= gen_atomic_compare_and_swap32di_1
; break;
28446 gcc_unreachable ();
28450 bdst
= TARGET_THUMB1
? bval
: gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
28451 emit_insn (gen (bdst
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
28453 if (mode
== QImode
|| mode
== HImode
)
28454 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
28456 /* In all cases, we arrange for success to be signaled by Z set.
28457 This arrangement allows for the boolean result to be used directly
28458 in a subsequent branch, post optimization. For Thumb-1 targets, the
28459 boolean negation of the result is also stored in bval because Thumb-1
28460 backend lacks dependency tracking for CC flag due to flag-setting not
28461 being represented at RTL level. */
28463 emit_insn (gen_cstoresi_eq0_thumb1 (bval
, bdst
));
28466 x
= gen_rtx_EQ (SImode
, bdst
, const0_rtx
);
28467 emit_insn (gen_rtx_SET (bval
, x
));
28471 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28472 another memory store between the load-exclusive and store-exclusive can
28473 reset the monitor from Exclusive to Open state. This means we must wait
28474 until after reload to split the pattern, lest we get a register spill in
28475 the middle of the atomic sequence. Success of the compare and swap is
28476 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28477 for Thumb-1 targets (ie. negation of the boolean value returned by
28478 atomic_compare_and_swapmode standard pattern in operand 0). */
28481 arm_split_compare_and_swap (rtx operands
[])
28483 rtx rval
, mem
, oldval
, newval
, neg_bval
;
28485 enum memmodel mod_s
, mod_f
;
28487 rtx_code_label
*label1
, *label2
;
28490 rval
= operands
[1];
28492 oldval
= operands
[3];
28493 newval
= operands
[4];
28494 is_weak
= (operands
[5] != const0_rtx
);
28495 mod_s
= memmodel_from_int (INTVAL (operands
[6]));
28496 mod_f
= memmodel_from_int (INTVAL (operands
[7]));
28497 neg_bval
= TARGET_THUMB1
? operands
[0] : operands
[8];
28498 mode
= GET_MODE (mem
);
28500 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (mod_s
);
28502 bool use_acquire
= TARGET_HAVE_LDACQ
28503 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28504 || is_mm_release (mod_s
));
28506 bool use_release
= TARGET_HAVE_LDACQ
28507 && !(is_mm_relaxed (mod_s
) || is_mm_consume (mod_s
)
28508 || is_mm_acquire (mod_s
));
28510 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28511 a full barrier is emitted after the store-release. */
28513 use_acquire
= false;
28515 /* Checks whether a barrier is needed and emits one accordingly. */
28516 if (!(use_acquire
|| use_release
))
28517 arm_pre_atomic_barrier (mod_s
);
28522 label1
= gen_label_rtx ();
28523 emit_label (label1
);
28525 label2
= gen_label_rtx ();
28527 arm_emit_load_exclusive (mode
, rval
, mem
, use_acquire
);
28529 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28530 as required to communicate with arm_expand_compare_and_swap. */
28533 cond
= arm_gen_compare_reg (NE
, rval
, oldval
, neg_bval
);
28534 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28535 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
28536 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
28537 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
28541 emit_move_insn (neg_bval
, const1_rtx
);
28542 cond
= gen_rtx_NE (VOIDmode
, rval
, oldval
);
28543 if (thumb1_cmpneg_operand (oldval
, SImode
))
28544 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval
, rval
, oldval
,
28547 emit_unlikely_jump (gen_cbranchsi4_insn (cond
, rval
, oldval
, label2
));
28550 arm_emit_store_exclusive (mode
, neg_bval
, mem
, newval
, use_release
);
28552 /* Weak or strong, we want EQ to be true for success, so that we
28553 match the flags that we got from the compare above. */
28556 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
28557 x
= gen_rtx_COMPARE (CCmode
, neg_bval
, const0_rtx
);
28558 emit_insn (gen_rtx_SET (cond
, x
));
28563 /* Z is set to boolean value of !neg_bval, as required to communicate
28564 with arm_expand_compare_and_swap. */
28565 x
= gen_rtx_NE (VOIDmode
, neg_bval
, const0_rtx
);
28566 emit_unlikely_jump (gen_cbranchsi4 (x
, neg_bval
, const0_rtx
, label1
));
28569 if (!is_mm_relaxed (mod_f
))
28570 emit_label (label2
);
28572 /* Checks whether a barrier is needed and emits one accordingly. */
28574 || !(use_acquire
|| use_release
))
28575 arm_post_atomic_barrier (mod_s
);
28577 if (is_mm_relaxed (mod_f
))
28578 emit_label (label2
);
28581 /* Split an atomic operation pattern. Operation is given by CODE and is one
28582 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28583 operation). Operation is performed on the content at MEM and on VALUE
28584 following the memory model MODEL_RTX. The content at MEM before and after
28585 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28586 success of the operation is returned in COND. Using a scratch register or
28587 an operand register for these determines what result is returned for that
28591 arm_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
28592 rtx value
, rtx model_rtx
, rtx cond
)
28594 enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
28595 machine_mode mode
= GET_MODE (mem
);
28596 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
28597 rtx_code_label
*label
;
28598 bool all_low_regs
, bind_old_new
;
28601 bool is_armv8_sync
= arm_arch8
&& is_mm_sync (model
);
28603 bool use_acquire
= TARGET_HAVE_LDACQ
28604 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28605 || is_mm_release (model
));
28607 bool use_release
= TARGET_HAVE_LDACQ
28608 && !(is_mm_relaxed (model
) || is_mm_consume (model
)
28609 || is_mm_acquire (model
));
28611 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28612 a full barrier is emitted after the store-release. */
28614 use_acquire
= false;
28616 /* Checks whether a barrier is needed and emits one accordingly. */
28617 if (!(use_acquire
|| use_release
))
28618 arm_pre_atomic_barrier (model
);
28620 label
= gen_label_rtx ();
28621 emit_label (label
);
28624 new_out
= gen_lowpart (wmode
, new_out
);
28626 old_out
= gen_lowpart (wmode
, old_out
);
28629 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
28631 arm_emit_load_exclusive (mode
, old_out
, mem
, use_acquire
);
28633 /* Does the operation require destination and first operand to use the same
28634 register? This is decided by register constraints of relevant insn
28635 patterns in thumb1.md. */
28636 gcc_assert (!new_out
|| REG_P (new_out
));
28637 all_low_regs
= REG_P (value
) && REGNO_REG_CLASS (REGNO (value
)) == LO_REGS
28638 && new_out
&& REGNO_REG_CLASS (REGNO (new_out
)) == LO_REGS
28639 && REGNO_REG_CLASS (REGNO (old_out
)) == LO_REGS
;
28644 && (code
!= PLUS
|| (!all_low_regs
&& !satisfies_constraint_L (value
))));
28646 /* We want to return the old value while putting the result of the operation
28647 in the same register as the old value so copy the old value over to the
28648 destination register and use that register for the operation. */
28649 if (old_out
&& bind_old_new
)
28651 emit_move_insn (new_out
, old_out
);
28662 x
= gen_rtx_AND (wmode
, old_out
, value
);
28663 emit_insn (gen_rtx_SET (new_out
, x
));
28664 x
= gen_rtx_NOT (wmode
, new_out
);
28665 emit_insn (gen_rtx_SET (new_out
, x
));
28669 if (CONST_INT_P (value
))
28671 value
= GEN_INT (-INTVAL (value
));
28677 if (mode
== DImode
)
28679 /* DImode plus/minus need to clobber flags. */
28680 /* The adddi3 and subdi3 patterns are incorrectly written so that
28681 they require matching operands, even when we could easily support
28682 three operands. Thankfully, this can be fixed up post-splitting,
28683 as the individual add+adc patterns do accept three operands and
28684 post-reload cprop can make these moves go away. */
28685 emit_move_insn (new_out
, old_out
);
28687 x
= gen_adddi3 (new_out
, new_out
, value
);
28689 x
= gen_subdi3 (new_out
, new_out
, value
);
28696 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
28697 emit_insn (gen_rtx_SET (new_out
, x
));
28701 arm_emit_store_exclusive (mode
, cond
, mem
, gen_lowpart (mode
, new_out
),
28704 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
28705 emit_unlikely_jump (gen_cbranchsi4 (x
, cond
, const0_rtx
, label
));
28707 /* Checks whether a barrier is needed and emits one accordingly. */
28709 || !(use_acquire
|| use_release
))
28710 arm_post_atomic_barrier (model
);
28713 #define MAX_VECT_LEN 16
28715 struct expand_vec_perm_d
28717 rtx target
, op0
, op1
;
28718 auto_vec_perm_indices perm
;
28719 machine_mode vmode
;
28724 /* Generate a variable permutation. */
28727 arm_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28729 machine_mode vmode
= GET_MODE (target
);
28730 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28732 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
28733 gcc_checking_assert (GET_MODE (op0
) == vmode
);
28734 gcc_checking_assert (GET_MODE (op1
) == vmode
);
28735 gcc_checking_assert (GET_MODE (sel
) == vmode
);
28736 gcc_checking_assert (TARGET_NEON
);
28740 if (vmode
== V8QImode
)
28741 emit_insn (gen_neon_vtbl1v8qi (target
, op0
, sel
));
28743 emit_insn (gen_neon_vtbl1v16qi (target
, op0
, sel
));
28749 if (vmode
== V8QImode
)
28751 pair
= gen_reg_rtx (V16QImode
);
28752 emit_insn (gen_neon_vcombinev8qi (pair
, op0
, op1
));
28753 pair
= gen_lowpart (TImode
, pair
);
28754 emit_insn (gen_neon_vtbl2v8qi (target
, pair
, sel
));
28758 pair
= gen_reg_rtx (OImode
);
28759 emit_insn (gen_neon_vcombinev16qi (pair
, op0
, op1
));
28760 emit_insn (gen_neon_vtbl2v16qi (target
, pair
, sel
));
28766 arm_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
28768 machine_mode vmode
= GET_MODE (target
);
28769 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
28770 bool one_vector_p
= rtx_equal_p (op0
, op1
);
28773 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28774 numbering of elements for big-endian, we must reverse the order. */
28775 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
28777 /* The VTBL instruction does not use a modulo index, so we must take care
28778 of that ourselves. */
28779 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28780 mask
= gen_const_vec_duplicate (vmode
, mask
);
28781 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
28783 arm_expand_vec_perm_1 (target
, op0
, op1
, sel
);
28786 /* Map lane ordering between architectural lane order, and GCC lane order,
28787 taking into account ABI. See comment above output_move_neon for details. */
28790 neon_endian_lane_map (machine_mode mode
, int lane
)
28792 if (BYTES_BIG_ENDIAN
)
28794 int nelems
= GET_MODE_NUNITS (mode
);
28795 /* Reverse lane order. */
28796 lane
= (nelems
- 1 - lane
);
28797 /* Reverse D register order, to match ABI. */
28798 if (GET_MODE_SIZE (mode
) == 16)
28799 lane
= lane
^ (nelems
/ 2);
28804 /* Some permutations index into pairs of vectors, this is a helper function
28805 to map indexes into those pairs of vectors. */
28808 neon_pair_endian_lane_map (machine_mode mode
, int lane
)
28810 int nelem
= GET_MODE_NUNITS (mode
);
28811 if (BYTES_BIG_ENDIAN
)
28813 neon_endian_lane_map (mode
, lane
& (nelem
- 1)) + (lane
& nelem
);
28817 /* Generate or test for an insn that supports a constant permutation. */
28819 /* Recognize patterns for the VUZP insns. */
28822 arm_evpc_neon_vuzp (struct expand_vec_perm_d
*d
)
28824 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
28825 rtx out0
, out1
, in0
, in1
;
28826 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28830 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28833 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28834 big endian pattern on 64 bit vectors, so we correct for that. */
28835 swap_nelt
= BYTES_BIG_ENDIAN
&& !d
->one_vector_p
28836 && GET_MODE_SIZE (d
->vmode
) == 8 ? nelt
: 0;
28838 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0)] ^ swap_nelt
;
28840 if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28842 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 1))
28846 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28848 for (i
= 0; i
< nelt
; i
++)
28851 (neon_pair_endian_lane_map (d
->vmode
, i
) * 2 + odd
) & mask
;
28852 if ((d
->perm
[i
] ^ swap_nelt
) != neon_pair_endian_lane_map (d
->vmode
, elt
))
28862 case E_V16QImode
: gen
= gen_neon_vuzpv16qi_internal
; break;
28863 case E_V8QImode
: gen
= gen_neon_vuzpv8qi_internal
; break;
28864 case E_V8HImode
: gen
= gen_neon_vuzpv8hi_internal
; break;
28865 case E_V4HImode
: gen
= gen_neon_vuzpv4hi_internal
; break;
28866 case E_V8HFmode
: gen
= gen_neon_vuzpv8hf_internal
; break;
28867 case E_V4HFmode
: gen
= gen_neon_vuzpv4hf_internal
; break;
28868 case E_V4SImode
: gen
= gen_neon_vuzpv4si_internal
; break;
28869 case E_V2SImode
: gen
= gen_neon_vuzpv2si_internal
; break;
28870 case E_V2SFmode
: gen
= gen_neon_vuzpv2sf_internal
; break;
28871 case E_V4SFmode
: gen
= gen_neon_vuzpv4sf_internal
; break;
28873 gcc_unreachable ();
28878 if (swap_nelt
!= 0)
28879 std::swap (in0
, in1
);
28882 out1
= gen_reg_rtx (d
->vmode
);
28884 std::swap (out0
, out1
);
28886 emit_insn (gen (out0
, in0
, in1
, out1
));
28890 /* Recognize patterns for the VZIP insns. */
28893 arm_evpc_neon_vzip (struct expand_vec_perm_d
*d
)
28895 unsigned int i
, high
, mask
, nelt
= d
->perm
.length ();
28896 rtx out0
, out1
, in0
, in1
;
28897 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
28901 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
28904 is_swapped
= BYTES_BIG_ENDIAN
;
28906 first_elem
= d
->perm
[neon_endian_lane_map (d
->vmode
, 0) ^ is_swapped
];
28909 if (first_elem
== neon_endian_lane_map (d
->vmode
, high
))
28911 else if (first_elem
== neon_endian_lane_map (d
->vmode
, 0))
28915 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
28917 for (i
= 0; i
< nelt
/ 2; i
++)
28920 neon_pair_endian_lane_map (d
->vmode
, i
+ high
) & mask
;
28921 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ is_swapped
)]
28925 neon_pair_endian_lane_map (d
->vmode
, i
+ nelt
+ high
) & mask
;
28926 if (d
->perm
[neon_pair_endian_lane_map (d
->vmode
, 2 * i
+ !is_swapped
)]
28937 case E_V16QImode
: gen
= gen_neon_vzipv16qi_internal
; break;
28938 case E_V8QImode
: gen
= gen_neon_vzipv8qi_internal
; break;
28939 case E_V8HImode
: gen
= gen_neon_vzipv8hi_internal
; break;
28940 case E_V4HImode
: gen
= gen_neon_vzipv4hi_internal
; break;
28941 case E_V8HFmode
: gen
= gen_neon_vzipv8hf_internal
; break;
28942 case E_V4HFmode
: gen
= gen_neon_vzipv4hf_internal
; break;
28943 case E_V4SImode
: gen
= gen_neon_vzipv4si_internal
; break;
28944 case E_V2SImode
: gen
= gen_neon_vzipv2si_internal
; break;
28945 case E_V2SFmode
: gen
= gen_neon_vzipv2sf_internal
; break;
28946 case E_V4SFmode
: gen
= gen_neon_vzipv4sf_internal
; break;
28948 gcc_unreachable ();
28954 std::swap (in0
, in1
);
28957 out1
= gen_reg_rtx (d
->vmode
);
28959 std::swap (out0
, out1
);
28961 emit_insn (gen (out0
, in0
, in1
, out1
));
28965 /* Recognize patterns for the VREV insns. */
28968 arm_evpc_neon_vrev (struct expand_vec_perm_d
*d
)
28970 unsigned int i
, j
, diff
, nelt
= d
->perm
.length ();
28971 rtx (*gen
)(rtx
, rtx
);
28973 if (!d
->one_vector_p
)
28982 case E_V16QImode
: gen
= gen_neon_vrev64v16qi
; break;
28983 case E_V8QImode
: gen
= gen_neon_vrev64v8qi
; break;
28991 case E_V16QImode
: gen
= gen_neon_vrev32v16qi
; break;
28992 case E_V8QImode
: gen
= gen_neon_vrev32v8qi
; break;
28993 case E_V8HImode
: gen
= gen_neon_vrev64v8hi
; break;
28994 case E_V4HImode
: gen
= gen_neon_vrev64v4hi
; break;
28995 case E_V8HFmode
: gen
= gen_neon_vrev64v8hf
; break;
28996 case E_V4HFmode
: gen
= gen_neon_vrev64v4hf
; break;
29004 case E_V16QImode
: gen
= gen_neon_vrev16v16qi
; break;
29005 case E_V8QImode
: gen
= gen_neon_vrev16v8qi
; break;
29006 case E_V8HImode
: gen
= gen_neon_vrev32v8hi
; break;
29007 case E_V4HImode
: gen
= gen_neon_vrev32v4hi
; break;
29008 case E_V4SImode
: gen
= gen_neon_vrev64v4si
; break;
29009 case E_V2SImode
: gen
= gen_neon_vrev64v2si
; break;
29010 case E_V4SFmode
: gen
= gen_neon_vrev64v4sf
; break;
29011 case E_V2SFmode
: gen
= gen_neon_vrev64v2sf
; break;
29020 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
29021 for (j
= 0; j
<= diff
; j
+= 1)
29023 /* This is guaranteed to be true as the value of diff
29024 is 7, 3, 1 and we should have enough elements in the
29025 queue to generate this. Getting a vector mask with a
29026 value of diff other than these values implies that
29027 something is wrong by the time we get here. */
29028 gcc_assert (i
+ j
< nelt
);
29029 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
29037 emit_insn (gen (d
->target
, d
->op0
));
29041 /* Recognize patterns for the VTRN insns. */
29044 arm_evpc_neon_vtrn (struct expand_vec_perm_d
*d
)
29046 unsigned int i
, odd
, mask
, nelt
= d
->perm
.length ();
29047 rtx out0
, out1
, in0
, in1
;
29048 rtx (*gen
)(rtx
, rtx
, rtx
, rtx
);
29050 if (GET_MODE_UNIT_SIZE (d
->vmode
) >= 8)
29053 /* Note that these are little-endian tests. Adjust for big-endian later. */
29054 if (d
->perm
[0] == 0)
29056 else if (d
->perm
[0] == 1)
29060 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
29062 for (i
= 0; i
< nelt
; i
+= 2)
29064 if (d
->perm
[i
] != i
+ odd
)
29066 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
29076 case E_V16QImode
: gen
= gen_neon_vtrnv16qi_internal
; break;
29077 case E_V8QImode
: gen
= gen_neon_vtrnv8qi_internal
; break;
29078 case E_V8HImode
: gen
= gen_neon_vtrnv8hi_internal
; break;
29079 case E_V4HImode
: gen
= gen_neon_vtrnv4hi_internal
; break;
29080 case E_V8HFmode
: gen
= gen_neon_vtrnv8hf_internal
; break;
29081 case E_V4HFmode
: gen
= gen_neon_vtrnv4hf_internal
; break;
29082 case E_V4SImode
: gen
= gen_neon_vtrnv4si_internal
; break;
29083 case E_V2SImode
: gen
= gen_neon_vtrnv2si_internal
; break;
29084 case E_V2SFmode
: gen
= gen_neon_vtrnv2sf_internal
; break;
29085 case E_V4SFmode
: gen
= gen_neon_vtrnv4sf_internal
; break;
29087 gcc_unreachable ();
29092 if (BYTES_BIG_ENDIAN
)
29094 std::swap (in0
, in1
);
29099 out1
= gen_reg_rtx (d
->vmode
);
29101 std::swap (out0
, out1
);
29103 emit_insn (gen (out0
, in0
, in1
, out1
));
29107 /* Recognize patterns for the VEXT insns. */
29110 arm_evpc_neon_vext (struct expand_vec_perm_d
*d
)
29112 unsigned int i
, nelt
= d
->perm
.length ();
29113 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
29116 unsigned int location
;
29118 unsigned int next
= d
->perm
[0] + 1;
29120 /* TODO: Handle GCC's numbering of elements for big-endian. */
29121 if (BYTES_BIG_ENDIAN
)
29124 /* Check if the extracted indexes are increasing by one. */
29125 for (i
= 1; i
< nelt
; next
++, i
++)
29127 /* If we hit the most significant element of the 2nd vector in
29128 the previous iteration, no need to test further. */
29129 if (next
== 2 * nelt
)
29132 /* If we are operating on only one vector: it could be a
29133 rotation. If there are only two elements of size < 64, let
29134 arm_evpc_neon_vrev catch it. */
29135 if (d
->one_vector_p
&& (next
== nelt
))
29137 if ((nelt
== 2) && (d
->vmode
!= V2DImode
))
29143 if (d
->perm
[i
] != next
)
29147 location
= d
->perm
[0];
29151 case E_V16QImode
: gen
= gen_neon_vextv16qi
; break;
29152 case E_V8QImode
: gen
= gen_neon_vextv8qi
; break;
29153 case E_V4HImode
: gen
= gen_neon_vextv4hi
; break;
29154 case E_V8HImode
: gen
= gen_neon_vextv8hi
; break;
29155 case E_V2SImode
: gen
= gen_neon_vextv2si
; break;
29156 case E_V4SImode
: gen
= gen_neon_vextv4si
; break;
29157 case E_V4HFmode
: gen
= gen_neon_vextv4hf
; break;
29158 case E_V8HFmode
: gen
= gen_neon_vextv8hf
; break;
29159 case E_V2SFmode
: gen
= gen_neon_vextv2sf
; break;
29160 case E_V4SFmode
: gen
= gen_neon_vextv4sf
; break;
29161 case E_V2DImode
: gen
= gen_neon_vextv2di
; break;
29170 offset
= GEN_INT (location
);
29171 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
29175 /* The NEON VTBL instruction is a fully variable permuation that's even
29176 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29177 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29178 can do slightly better by expanding this as a constant where we don't
29179 have to apply a mask. */
29182 arm_evpc_neon_vtbl (struct expand_vec_perm_d
*d
)
29184 rtx rperm
[MAX_VECT_LEN
], sel
;
29185 machine_mode vmode
= d
->vmode
;
29186 unsigned int i
, nelt
= d
->perm
.length ();
29188 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29189 numbering of elements for big-endian, we must reverse the order. */
29190 if (BYTES_BIG_ENDIAN
)
29196 /* Generic code will try constant permutation twice. Once with the
29197 original mode and again with the elements lowered to QImode.
29198 So wait and don't do the selector expansion ourselves. */
29199 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
29202 for (i
= 0; i
< nelt
; ++i
)
29203 rperm
[i
] = GEN_INT (d
->perm
[i
]);
29204 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
29205 sel
= force_reg (vmode
, sel
);
29207 arm_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
29212 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
29214 /* Check if the input mask matches vext before reordering the
29217 if (arm_evpc_neon_vext (d
))
29220 /* The pattern matching functions above are written to look for a small
29221 number to begin the sequence (0, 1, N/2). If we begin with an index
29222 from the second operand, we can swap the operands. */
29223 unsigned int nelt
= d
->perm
.length ();
29224 if (d
->perm
[0] >= nelt
)
29226 for (unsigned int i
= 0; i
< nelt
; ++i
)
29227 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
29229 std::swap (d
->op0
, d
->op1
);
29234 if (arm_evpc_neon_vuzp (d
))
29236 if (arm_evpc_neon_vzip (d
))
29238 if (arm_evpc_neon_vrev (d
))
29240 if (arm_evpc_neon_vtrn (d
))
29242 return arm_evpc_neon_vtbl (d
);
29247 /* Expand a vec_perm_const pattern. */
29250 arm_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
29252 struct expand_vec_perm_d d
;
29253 int i
, nelt
, which
;
29259 d
.vmode
= GET_MODE (target
);
29260 gcc_assert (VECTOR_MODE_P (d
.vmode
));
29261 d
.testing_p
= false;
29263 nelt
= GET_MODE_NUNITS (d
.vmode
);
29264 d
.perm
.reserve (nelt
);
29265 for (i
= which
= 0; i
< nelt
; ++i
)
29267 rtx e
= XVECEXP (sel
, 0, i
);
29268 int ei
= INTVAL (e
) & (2 * nelt
- 1);
29269 which
|= (ei
< nelt
? 1 : 2);
29270 d
.perm
.quick_push (ei
);
29279 d
.one_vector_p
= false;
29280 if (!rtx_equal_p (op0
, op1
))
29283 /* The elements of PERM do not suggest that only the first operand
29284 is used, but both operands are identical. Allow easier matching
29285 of the permutation by folding the permutation into the single
29289 for (i
= 0; i
< nelt
; ++i
)
29290 d
.perm
[i
] &= nelt
- 1;
29292 d
.one_vector_p
= true;
29297 d
.one_vector_p
= true;
29301 return arm_expand_vec_perm_const_1 (&d
);
29304 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29307 arm_vectorize_vec_perm_const_ok (machine_mode vmode
, vec_perm_indices sel
)
29309 struct expand_vec_perm_d d
;
29310 unsigned int i
, nelt
, which
;
29314 d
.testing_p
= true;
29315 d
.perm
.safe_splice (sel
);
29317 /* Categorize the set of elements in the selector. */
29318 nelt
= GET_MODE_NUNITS (d
.vmode
);
29319 for (i
= which
= 0; i
< nelt
; ++i
)
29321 unsigned int e
= d
.perm
[i
];
29322 gcc_assert (e
< 2 * nelt
);
29323 which
|= (e
< nelt
? 1 : 2);
29326 /* For all elements from second vector, fold the elements to first. */
29328 for (i
= 0; i
< nelt
; ++i
)
29331 /* Check whether the mask can be applied to the vector type. */
29332 d
.one_vector_p
= (which
!= 3);
29334 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
29335 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
29336 if (!d
.one_vector_p
)
29337 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
29340 ret
= arm_expand_vec_perm_const_1 (&d
);
29347 arm_autoinc_modes_ok_p (machine_mode mode
, enum arm_auto_incmodes code
)
29349 /* If we are soft float and we do not have ldrd
29350 then all auto increment forms are ok. */
29351 if (TARGET_SOFT_FLOAT
&& (TARGET_LDRD
|| GET_MODE_SIZE (mode
) <= 4))
29356 /* Post increment and Pre Decrement are supported for all
29357 instruction forms except for vector forms. */
29360 if (VECTOR_MODE_P (mode
))
29362 if (code
!= ARM_PRE_DEC
)
29372 /* Without LDRD and mode size greater than
29373 word size, there is no point in auto-incrementing
29374 because ldm and stm will not have these forms. */
29375 if (!TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4)
29378 /* Vector and floating point modes do not support
29379 these auto increment forms. */
29380 if (FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
29393 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29394 on ARM, since we know that shifts by negative amounts are no-ops.
29395 Additionally, the default expansion code is not available or suitable
29396 for post-reload insn splits (this can occur when the register allocator
29397 chooses not to do a shift in NEON).
29399 This function is used in both initial expand and post-reload splits, and
29400 handles all kinds of 64-bit shifts.
29402 Input requirements:
29403 - It is safe for the input and output to be the same register, but
29404 early-clobber rules apply for the shift amount and scratch registers.
29405 - Shift by register requires both scratch registers. In all other cases
29406 the scratch registers may be NULL.
29407 - Ashiftrt by a register also clobbers the CC register. */
29409 arm_emit_coreregs_64bit_shift (enum rtx_code code
, rtx out
, rtx in
,
29410 rtx amount
, rtx scratch1
, rtx scratch2
)
29412 rtx out_high
= gen_highpart (SImode
, out
);
29413 rtx out_low
= gen_lowpart (SImode
, out
);
29414 rtx in_high
= gen_highpart (SImode
, in
);
29415 rtx in_low
= gen_lowpart (SImode
, in
);
29418 in = the register pair containing the input value.
29419 out = the destination register pair.
29420 up = the high- or low-part of each pair.
29421 down = the opposite part to "up".
29422 In a shift, we can consider bits to shift from "up"-stream to
29423 "down"-stream, so in a left-shift "up" is the low-part and "down"
29424 is the high-part of each register pair. */
29426 rtx out_up
= code
== ASHIFT
? out_low
: out_high
;
29427 rtx out_down
= code
== ASHIFT
? out_high
: out_low
;
29428 rtx in_up
= code
== ASHIFT
? in_low
: in_high
;
29429 rtx in_down
= code
== ASHIFT
? in_high
: in_low
;
29431 gcc_assert (code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
);
29433 && (REG_P (out
) || GET_CODE (out
) == SUBREG
)
29434 && GET_MODE (out
) == DImode
);
29436 && (REG_P (in
) || GET_CODE (in
) == SUBREG
)
29437 && GET_MODE (in
) == DImode
);
29439 && (((REG_P (amount
) || GET_CODE (amount
) == SUBREG
)
29440 && GET_MODE (amount
) == SImode
)
29441 || CONST_INT_P (amount
)));
29442 gcc_assert (scratch1
== NULL
29443 || (GET_CODE (scratch1
) == SCRATCH
)
29444 || (GET_MODE (scratch1
) == SImode
29445 && REG_P (scratch1
)));
29446 gcc_assert (scratch2
== NULL
29447 || (GET_CODE (scratch2
) == SCRATCH
)
29448 || (GET_MODE (scratch2
) == SImode
29449 && REG_P (scratch2
)));
29450 gcc_assert (!REG_P (out
) || !REG_P (amount
)
29451 || !HARD_REGISTER_P (out
)
29452 || (REGNO (out
) != REGNO (amount
)
29453 && REGNO (out
) + 1 != REGNO (amount
)));
29455 /* Macros to make following code more readable. */
29456 #define SUB_32(DEST,SRC) \
29457 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29458 #define RSB_32(DEST,SRC) \
29459 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29460 #define SUB_S_32(DEST,SRC) \
29461 gen_addsi3_compare0 ((DEST), (SRC), \
29463 #define SET(DEST,SRC) \
29464 gen_rtx_SET ((DEST), (SRC))
29465 #define SHIFT(CODE,SRC,AMOUNT) \
29466 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29467 #define LSHIFT(CODE,SRC,AMOUNT) \
29468 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29469 SImode, (SRC), (AMOUNT))
29470 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29471 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29472 SImode, (SRC), (AMOUNT))
29474 gen_rtx_IOR (SImode, (A), (B))
29475 #define BRANCH(COND,LABEL) \
29476 gen_arm_cond_branch ((LABEL), \
29477 gen_rtx_ ## COND (CCmode, cc_reg, \
29481 /* Shifts by register and shifts by constant are handled separately. */
29482 if (CONST_INT_P (amount
))
29484 /* We have a shift-by-constant. */
29486 /* First, handle out-of-range shift amounts.
29487 In both cases we try to match the result an ARM instruction in a
29488 shift-by-register would give. This helps reduce execution
29489 differences between optimization levels, but it won't stop other
29490 parts of the compiler doing different things. This is "undefined
29491 behavior, in any case. */
29492 if (INTVAL (amount
) <= 0)
29493 emit_insn (gen_movdi (out
, in
));
29494 else if (INTVAL (amount
) >= 64)
29496 if (code
== ASHIFTRT
)
29498 rtx const31_rtx
= GEN_INT (31);
29499 emit_insn (SET (out_down
, SHIFT (code
, in_up
, const31_rtx
)));
29500 emit_insn (SET (out_up
, SHIFT (code
, in_up
, const31_rtx
)));
29503 emit_insn (gen_movdi (out
, const0_rtx
));
29506 /* Now handle valid shifts. */
29507 else if (INTVAL (amount
) < 32)
29509 /* Shifts by a constant less than 32. */
29510 rtx reverse_amount
= GEN_INT (32 - INTVAL (amount
));
29512 /* Clearing the out register in DImode first avoids lots
29513 of spilling and results in less stack usage.
29514 Later this redundant insn is completely removed.
29515 Do that only if "in" and "out" are different registers. */
29516 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29517 emit_insn (SET (out
, const0_rtx
));
29518 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29519 emit_insn (SET (out_down
,
29520 ORR (REV_LSHIFT (code
, in_up
, reverse_amount
),
29522 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29526 /* Shifts by a constant greater than 31. */
29527 rtx adj_amount
= GEN_INT (INTVAL (amount
) - 32);
29529 if (REG_P (out
) && REG_P (in
) && REGNO (out
) != REGNO (in
))
29530 emit_insn (SET (out
, const0_rtx
));
29531 emit_insn (SET (out_down
, SHIFT (code
, in_up
, adj_amount
)));
29532 if (code
== ASHIFTRT
)
29533 emit_insn (gen_ashrsi3 (out_up
, in_up
,
29536 emit_insn (SET (out_up
, const0_rtx
));
29541 /* We have a shift-by-register. */
29542 rtx cc_reg
= gen_rtx_REG (CC_NOOVmode
, CC_REGNUM
);
29544 /* This alternative requires the scratch registers. */
29545 gcc_assert (scratch1
&& REG_P (scratch1
));
29546 gcc_assert (scratch2
&& REG_P (scratch2
));
29548 /* We will need the values "amount-32" and "32-amount" later.
29549 Swapping them around now allows the later code to be more general. */
29553 emit_insn (SUB_32 (scratch1
, amount
));
29554 emit_insn (RSB_32 (scratch2
, amount
));
29557 emit_insn (RSB_32 (scratch1
, amount
));
29558 /* Also set CC = amount > 32. */
29559 emit_insn (SUB_S_32 (scratch2
, amount
));
29562 emit_insn (RSB_32 (scratch1
, amount
));
29563 emit_insn (SUB_32 (scratch2
, amount
));
29566 gcc_unreachable ();
29569 /* Emit code like this:
29572 out_down = in_down << amount;
29573 out_down = (in_up << (amount - 32)) | out_down;
29574 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29575 out_up = in_up << amount;
29578 out_down = in_down >> amount;
29579 out_down = (in_up << (32 - amount)) | out_down;
29581 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29582 out_up = in_up << amount;
29585 out_down = in_down >> amount;
29586 out_down = (in_up << (32 - amount)) | out_down;
29588 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29589 out_up = in_up << amount;
29591 The ARM and Thumb2 variants are the same but implemented slightly
29592 differently. If this were only called during expand we could just
29593 use the Thumb2 case and let combine do the right thing, but this
29594 can also be called from post-reload splitters. */
29596 emit_insn (SET (out_down
, LSHIFT (code
, in_down
, amount
)));
29598 if (!TARGET_THUMB2
)
29600 /* Emit code for ARM mode. */
29601 emit_insn (SET (out_down
,
29602 ORR (SHIFT (ASHIFT
, in_up
, scratch1
), out_down
)));
29603 if (code
== ASHIFTRT
)
29605 rtx_code_label
*done_label
= gen_label_rtx ();
29606 emit_jump_insn (BRANCH (LT
, done_label
));
29607 emit_insn (SET (out_down
, ORR (SHIFT (ASHIFTRT
, in_up
, scratch2
),
29609 emit_label (done_label
);
29612 emit_insn (SET (out_down
, ORR (SHIFT (LSHIFTRT
, in_up
, scratch2
),
29617 /* Emit code for Thumb2 mode.
29618 Thumb2 can't do shift and or in one insn. */
29619 emit_insn (SET (scratch1
, SHIFT (ASHIFT
, in_up
, scratch1
)));
29620 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch1
));
29622 if (code
== ASHIFTRT
)
29624 rtx_code_label
*done_label
= gen_label_rtx ();
29625 emit_jump_insn (BRANCH (LT
, done_label
));
29626 emit_insn (SET (scratch2
, SHIFT (ASHIFTRT
, in_up
, scratch2
)));
29627 emit_insn (SET (out_down
, ORR (out_down
, scratch2
)));
29628 emit_label (done_label
);
29632 emit_insn (SET (scratch2
, SHIFT (LSHIFTRT
, in_up
, scratch2
)));
29633 emit_insn (gen_iorsi3 (out_down
, out_down
, scratch2
));
29637 emit_insn (SET (out_up
, SHIFT (code
, in_up
, amount
)));
29651 /* Returns true if the pattern is a valid symbolic address, which is either a
29652 symbol_ref or (symbol_ref + addend).
29654 According to the ARM ELF ABI, the initial addend of REL-type relocations
29655 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29656 literal field of the instruction as a 16-bit signed value in the range
29657 -32768 <= A < 32768. */
29660 arm_valid_symbolic_address_p (rtx addr
)
29662 rtx xop0
, xop1
= NULL_RTX
;
29665 if (GET_CODE (tmp
) == SYMBOL_REF
|| GET_CODE (tmp
) == LABEL_REF
)
29668 /* (const (plus: symbol_ref const_int)) */
29669 if (GET_CODE (addr
) == CONST
)
29670 tmp
= XEXP (addr
, 0);
29672 if (GET_CODE (tmp
) == PLUS
)
29674 xop0
= XEXP (tmp
, 0);
29675 xop1
= XEXP (tmp
, 1);
29677 if (GET_CODE (xop0
) == SYMBOL_REF
&& CONST_INT_P (xop1
))
29678 return IN_RANGE (INTVAL (xop1
), -0x8000, 0x7fff);
29684 /* Returns true if a valid comparison operation and makes
29685 the operands in a form that is valid. */
29687 arm_validize_comparison (rtx
*comparison
, rtx
* op1
, rtx
* op2
)
29689 enum rtx_code code
= GET_CODE (*comparison
);
29691 machine_mode mode
= (GET_MODE (*op1
) == VOIDmode
)
29692 ? GET_MODE (*op2
) : GET_MODE (*op1
);
29694 gcc_assert (GET_MODE (*op1
) != VOIDmode
|| GET_MODE (*op2
) != VOIDmode
);
29696 if (code
== UNEQ
|| code
== LTGT
)
29699 code_int
= (int)code
;
29700 arm_canonicalize_comparison (&code_int
, op1
, op2
, 0);
29701 PUT_CODE (*comparison
, (enum rtx_code
)code_int
);
29706 if (!arm_add_operand (*op1
, mode
))
29707 *op1
= force_reg (mode
, *op1
);
29708 if (!arm_add_operand (*op2
, mode
))
29709 *op2
= force_reg (mode
, *op2
);
29713 if (!cmpdi_operand (*op1
, mode
))
29714 *op1
= force_reg (mode
, *op1
);
29715 if (!cmpdi_operand (*op2
, mode
))
29716 *op2
= force_reg (mode
, *op2
);
29720 if (!TARGET_VFP_FP16INST
)
29722 /* FP16 comparisons are done in SF mode. */
29724 *op1
= convert_to_mode (mode
, *op1
, 1);
29725 *op2
= convert_to_mode (mode
, *op2
, 1);
29726 /* Fall through. */
29729 if (!vfp_compare_operand (*op1
, mode
))
29730 *op1
= force_reg (mode
, *op1
);
29731 if (!vfp_compare_operand (*op2
, mode
))
29732 *op2
= force_reg (mode
, *op2
);
29742 /* Maximum number of instructions to set block of memory. */
29744 arm_block_set_max_insns (void)
29746 if (optimize_function_for_size_p (cfun
))
29749 return current_tune
->max_insns_inline_memset
;
29752 /* Return TRUE if it's profitable to set block of memory for
29753 non-vectorized case. VAL is the value to set the memory
29754 with. LENGTH is the number of bytes to set. ALIGN is the
29755 alignment of the destination memory in bytes. UNALIGNED_P
29756 is TRUE if we can only set the memory with instructions
29757 meeting alignment requirements. USE_STRD_P is TRUE if we
29758 can use strd to set the memory. */
29760 arm_block_set_non_vect_profit_p (rtx val
,
29761 unsigned HOST_WIDE_INT length
,
29762 unsigned HOST_WIDE_INT align
,
29763 bool unaligned_p
, bool use_strd_p
)
29766 /* For leftovers in bytes of 0-7, we can set the memory block using
29767 strb/strh/str with minimum instruction number. */
29768 const int leftover
[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29772 num
= arm_const_inline_cost (SET
, val
);
29773 num
+= length
/ align
+ length
% align
;
29775 else if (use_strd_p
)
29777 num
= arm_const_double_inline_cost (val
);
29778 num
+= (length
>> 3) + leftover
[length
& 7];
29782 num
= arm_const_inline_cost (SET
, val
);
29783 num
+= (length
>> 2) + leftover
[length
& 3];
29786 /* We may be able to combine last pair STRH/STRB into a single STR
29787 by shifting one byte back. */
29788 if (unaligned_access
&& length
> 3 && (length
& 3) == 3)
29791 return (num
<= arm_block_set_max_insns ());
29794 /* Return TRUE if it's profitable to set block of memory for
29795 vectorized case. LENGTH is the number of bytes to set.
29796 ALIGN is the alignment of destination memory in bytes.
29797 MODE is the vector mode used to set the memory. */
29799 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length
,
29800 unsigned HOST_WIDE_INT align
,
29804 bool unaligned_p
= ((align
& 3) != 0);
29805 unsigned int nelt
= GET_MODE_NUNITS (mode
);
29807 /* Instruction loading constant value. */
29809 /* Instructions storing the memory. */
29810 num
+= (length
+ nelt
- 1) / nelt
;
29811 /* Instructions adjusting the address expression. Only need to
29812 adjust address expression if it's 4 bytes aligned and bytes
29813 leftover can only be stored by mis-aligned store instruction. */
29814 if (!unaligned_p
&& (length
& 3) != 0)
29817 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29818 if (!unaligned_p
&& mode
== V16QImode
)
29821 return (num
<= arm_block_set_max_insns ());
29824 /* Set a block of memory using vectorization instructions for the
29825 unaligned case. We fill the first LENGTH bytes of the memory
29826 area starting from DSTBASE with byte constant VALUE. ALIGN is
29827 the alignment requirement of memory. Return TRUE if succeeded. */
29829 arm_block_set_unaligned_vect (rtx dstbase
,
29830 unsigned HOST_WIDE_INT length
,
29831 unsigned HOST_WIDE_INT value
,
29832 unsigned HOST_WIDE_INT align
)
29834 unsigned int i
, nelt_v16
, nelt_v8
, nelt_mode
;
29837 rtx (*gen_func
) (rtx
, rtx
);
29839 unsigned HOST_WIDE_INT v
= value
;
29840 unsigned int offset
= 0;
29841 gcc_assert ((align
& 0x3) != 0);
29842 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29843 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29844 if (length
>= nelt_v16
)
29847 gen_func
= gen_movmisalignv16qi
;
29852 gen_func
= gen_movmisalignv8qi
;
29854 nelt_mode
= GET_MODE_NUNITS (mode
);
29855 gcc_assert (length
>= nelt_mode
);
29856 /* Skip if it isn't profitable. */
29857 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29860 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29861 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29863 v
= sext_hwi (v
, BITS_PER_WORD
);
29865 reg
= gen_reg_rtx (mode
);
29866 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
29867 /* Emit instruction loading the constant value. */
29868 emit_move_insn (reg
, val_vec
);
29870 /* Handle nelt_mode bytes in a vector. */
29871 for (i
= 0; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29873 emit_insn ((*gen_func
) (mem
, reg
));
29874 if (i
+ 2 * nelt_mode
<= length
)
29876 emit_insn (gen_add2_insn (dst
, GEN_INT (nelt_mode
)));
29877 offset
+= nelt_mode
;
29878 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29882 /* If there are not less than nelt_v8 bytes leftover, we must be in
29884 gcc_assert ((i
+ nelt_v8
) > length
|| mode
== V16QImode
);
29886 /* Handle (8, 16) bytes leftover. */
29887 if (i
+ nelt_v8
< length
)
29889 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- i
)));
29890 offset
+= length
- i
;
29891 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29893 /* We are shifting bytes back, set the alignment accordingly. */
29894 if ((length
& 1) != 0 && align
>= 2)
29895 set_mem_align (mem
, BITS_PER_UNIT
);
29897 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29899 /* Handle (0, 8] bytes leftover. */
29900 else if (i
< length
&& i
+ nelt_v8
>= length
)
29902 if (mode
== V16QImode
)
29903 reg
= gen_lowpart (V8QImode
, reg
);
29905 emit_insn (gen_add2_insn (dst
, GEN_INT ((length
- i
)
29906 + (nelt_mode
- nelt_v8
))));
29907 offset
+= (length
- i
) + (nelt_mode
- nelt_v8
);
29908 mem
= adjust_automodify_address (dstbase
, V8QImode
, dst
, offset
);
29910 /* We are shifting bytes back, set the alignment accordingly. */
29911 if ((length
& 1) != 0 && align
>= 2)
29912 set_mem_align (mem
, BITS_PER_UNIT
);
29914 emit_insn (gen_movmisalignv8qi (mem
, reg
));
29920 /* Set a block of memory using vectorization instructions for the
29921 aligned case. We fill the first LENGTH bytes of the memory area
29922 starting from DSTBASE with byte constant VALUE. ALIGN is the
29923 alignment requirement of memory. Return TRUE if succeeded. */
29925 arm_block_set_aligned_vect (rtx dstbase
,
29926 unsigned HOST_WIDE_INT length
,
29927 unsigned HOST_WIDE_INT value
,
29928 unsigned HOST_WIDE_INT align
)
29930 unsigned int i
, nelt_v8
, nelt_v16
, nelt_mode
;
29931 rtx dst
, addr
, mem
;
29934 unsigned HOST_WIDE_INT v
= value
;
29935 unsigned int offset
= 0;
29937 gcc_assert ((align
& 0x3) == 0);
29938 nelt_v8
= GET_MODE_NUNITS (V8QImode
);
29939 nelt_v16
= GET_MODE_NUNITS (V16QImode
);
29940 if (length
>= nelt_v16
&& unaligned_access
&& !BYTES_BIG_ENDIAN
)
29945 nelt_mode
= GET_MODE_NUNITS (mode
);
29946 gcc_assert (length
>= nelt_mode
);
29947 /* Skip if it isn't profitable. */
29948 if (!arm_block_set_vect_profit_p (length
, align
, mode
))
29951 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
29953 v
= sext_hwi (v
, BITS_PER_WORD
);
29955 reg
= gen_reg_rtx (mode
);
29956 val_vec
= gen_const_vec_duplicate (mode
, GEN_INT (v
));
29957 /* Emit instruction loading the constant value. */
29958 emit_move_insn (reg
, val_vec
);
29961 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29962 if (mode
== V16QImode
)
29964 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29965 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29967 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29968 if (i
+ nelt_v8
< length
&& i
+ nelt_v16
> length
)
29970 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
29971 offset
+= length
- nelt_mode
;
29972 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
29973 /* We are shifting bytes back, set the alignment accordingly. */
29974 if ((length
& 0x3) == 0)
29975 set_mem_align (mem
, BITS_PER_UNIT
* 4);
29976 else if ((length
& 0x1) == 0)
29977 set_mem_align (mem
, BITS_PER_UNIT
* 2);
29979 set_mem_align (mem
, BITS_PER_UNIT
);
29981 emit_insn (gen_movmisalignv16qi (mem
, reg
));
29984 /* Fall through for bytes leftover. */
29986 nelt_mode
= GET_MODE_NUNITS (mode
);
29987 reg
= gen_lowpart (V8QImode
, reg
);
29990 /* Handle 8 bytes in a vector. */
29991 for (; (i
+ nelt_mode
<= length
); i
+= nelt_mode
)
29993 addr
= plus_constant (Pmode
, dst
, i
);
29994 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
+ i
);
29995 emit_move_insn (mem
, reg
);
29998 /* Handle single word leftover by shifting 4 bytes back. We can
29999 use aligned access for this case. */
30000 if (i
+ UNITS_PER_WORD
== length
)
30002 addr
= plus_constant (Pmode
, dst
, i
- UNITS_PER_WORD
);
30003 offset
+= i
- UNITS_PER_WORD
;
30004 mem
= adjust_automodify_address (dstbase
, mode
, addr
, offset
);
30005 /* We are shifting 4 bytes back, set the alignment accordingly. */
30006 if (align
> UNITS_PER_WORD
)
30007 set_mem_align (mem
, BITS_PER_UNIT
* UNITS_PER_WORD
);
30009 emit_move_insn (mem
, reg
);
30011 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30012 We have to use unaligned access for this case. */
30013 else if (i
< length
)
30015 emit_insn (gen_add2_insn (dst
, GEN_INT (length
- nelt_mode
)));
30016 offset
+= length
- nelt_mode
;
30017 mem
= adjust_automodify_address (dstbase
, mode
, dst
, offset
);
30018 /* We are shifting bytes back, set the alignment accordingly. */
30019 if ((length
& 1) == 0)
30020 set_mem_align (mem
, BITS_PER_UNIT
* 2);
30022 set_mem_align (mem
, BITS_PER_UNIT
);
30024 emit_insn (gen_movmisalignv8qi (mem
, reg
));
30030 /* Set a block of memory using plain strh/strb instructions, only
30031 using instructions allowed by ALIGN on processor. We fill the
30032 first LENGTH bytes of the memory area starting from DSTBASE
30033 with byte constant VALUE. ALIGN is the alignment requirement
30036 arm_block_set_unaligned_non_vect (rtx dstbase
,
30037 unsigned HOST_WIDE_INT length
,
30038 unsigned HOST_WIDE_INT value
,
30039 unsigned HOST_WIDE_INT align
)
30042 rtx dst
, addr
, mem
;
30043 rtx val_exp
, val_reg
, reg
;
30045 HOST_WIDE_INT v
= value
;
30047 gcc_assert (align
== 1 || align
== 2);
30050 v
|= (value
<< BITS_PER_UNIT
);
30052 v
= sext_hwi (v
, BITS_PER_WORD
);
30053 val_exp
= GEN_INT (v
);
30054 /* Skip if it isn't profitable. */
30055 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30056 align
, true, false))
30059 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30060 mode
= (align
== 2 ? HImode
: QImode
);
30061 val_reg
= force_reg (SImode
, val_exp
);
30062 reg
= gen_lowpart (mode
, val_reg
);
30064 for (i
= 0; (i
+ GET_MODE_SIZE (mode
) <= length
); i
+= GET_MODE_SIZE (mode
))
30066 addr
= plus_constant (Pmode
, dst
, i
);
30067 mem
= adjust_automodify_address (dstbase
, mode
, addr
, i
);
30068 emit_move_insn (mem
, reg
);
30071 /* Handle single byte leftover. */
30072 if (i
+ 1 == length
)
30074 reg
= gen_lowpart (QImode
, val_reg
);
30075 addr
= plus_constant (Pmode
, dst
, i
);
30076 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30077 emit_move_insn (mem
, reg
);
30081 gcc_assert (i
== length
);
30085 /* Set a block of memory using plain strd/str/strh/strb instructions,
30086 to permit unaligned copies on processors which support unaligned
30087 semantics for those instructions. We fill the first LENGTH bytes
30088 of the memory area starting from DSTBASE with byte constant VALUE.
30089 ALIGN is the alignment requirement of memory. */
30091 arm_block_set_aligned_non_vect (rtx dstbase
,
30092 unsigned HOST_WIDE_INT length
,
30093 unsigned HOST_WIDE_INT value
,
30094 unsigned HOST_WIDE_INT align
)
30097 rtx dst
, addr
, mem
;
30098 rtx val_exp
, val_reg
, reg
;
30099 unsigned HOST_WIDE_INT v
;
30102 use_strd_p
= (length
>= 2 * UNITS_PER_WORD
&& (align
& 3) == 0
30103 && TARGET_LDRD
&& current_tune
->prefer_ldrd_strd
);
30105 v
= (value
| (value
<< 8) | (value
<< 16) | (value
<< 24));
30106 if (length
< UNITS_PER_WORD
)
30107 v
&= (0xFFFFFFFF >> (UNITS_PER_WORD
- length
) * BITS_PER_UNIT
);
30110 v
|= (v
<< BITS_PER_WORD
);
30112 v
= sext_hwi (v
, BITS_PER_WORD
);
30114 val_exp
= GEN_INT (v
);
30115 /* Skip if it isn't profitable. */
30116 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30117 align
, false, use_strd_p
))
30122 /* Try without strd. */
30123 v
= (v
>> BITS_PER_WORD
);
30124 v
= sext_hwi (v
, BITS_PER_WORD
);
30125 val_exp
= GEN_INT (v
);
30126 use_strd_p
= false;
30127 if (!arm_block_set_non_vect_profit_p (val_exp
, length
,
30128 align
, false, use_strd_p
))
30133 dst
= copy_addr_to_reg (XEXP (dstbase
, 0));
30134 /* Handle double words using strd if possible. */
30137 val_reg
= force_reg (DImode
, val_exp
);
30139 for (; (i
+ 8 <= length
); i
+= 8)
30141 addr
= plus_constant (Pmode
, dst
, i
);
30142 mem
= adjust_automodify_address (dstbase
, DImode
, addr
, i
);
30143 emit_move_insn (mem
, reg
);
30147 val_reg
= force_reg (SImode
, val_exp
);
30149 /* Handle words. */
30150 reg
= (use_strd_p
? gen_lowpart (SImode
, val_reg
) : val_reg
);
30151 for (; (i
+ 4 <= length
); i
+= 4)
30153 addr
= plus_constant (Pmode
, dst
, i
);
30154 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
);
30155 if ((align
& 3) == 0)
30156 emit_move_insn (mem
, reg
);
30158 emit_insn (gen_unaligned_storesi (mem
, reg
));
30161 /* Merge last pair of STRH and STRB into a STR if possible. */
30162 if (unaligned_access
&& i
> 0 && (i
+ 3) == length
)
30164 addr
= plus_constant (Pmode
, dst
, i
- 1);
30165 mem
= adjust_automodify_address (dstbase
, SImode
, addr
, i
- 1);
30166 /* We are shifting one byte back, set the alignment accordingly. */
30167 if ((align
& 1) == 0)
30168 set_mem_align (mem
, BITS_PER_UNIT
);
30170 /* Most likely this is an unaligned access, and we can't tell at
30171 compilation time. */
30172 emit_insn (gen_unaligned_storesi (mem
, reg
));
30176 /* Handle half word leftover. */
30177 if (i
+ 2 <= length
)
30179 reg
= gen_lowpart (HImode
, val_reg
);
30180 addr
= plus_constant (Pmode
, dst
, i
);
30181 mem
= adjust_automodify_address (dstbase
, HImode
, addr
, i
);
30182 if ((align
& 1) == 0)
30183 emit_move_insn (mem
, reg
);
30185 emit_insn (gen_unaligned_storehi (mem
, reg
));
30190 /* Handle single byte leftover. */
30191 if (i
+ 1 == length
)
30193 reg
= gen_lowpart (QImode
, val_reg
);
30194 addr
= plus_constant (Pmode
, dst
, i
);
30195 mem
= adjust_automodify_address (dstbase
, QImode
, addr
, i
);
30196 emit_move_insn (mem
, reg
);
30202 /* Set a block of memory using vectorization instructions for both
30203 aligned and unaligned cases. We fill the first LENGTH bytes of
30204 the memory area starting from DSTBASE with byte constant VALUE.
30205 ALIGN is the alignment requirement of memory. */
30207 arm_block_set_vect (rtx dstbase
,
30208 unsigned HOST_WIDE_INT length
,
30209 unsigned HOST_WIDE_INT value
,
30210 unsigned HOST_WIDE_INT align
)
30212 /* Check whether we need to use unaligned store instruction. */
30213 if (((align
& 3) != 0 || (length
& 3) != 0)
30214 /* Check whether unaligned store instruction is available. */
30215 && (!unaligned_access
|| BYTES_BIG_ENDIAN
))
30218 if ((align
& 3) == 0)
30219 return arm_block_set_aligned_vect (dstbase
, length
, value
, align
);
30221 return arm_block_set_unaligned_vect (dstbase
, length
, value
, align
);
30224 /* Expand string store operation. Firstly we try to do that by using
30225 vectorization instructions, then try with ARM unaligned access and
30226 double-word store if profitable. OPERANDS[0] is the destination,
30227 OPERANDS[1] is the number of bytes, operands[2] is the value to
30228 initialize the memory, OPERANDS[3] is the known alignment of the
30231 arm_gen_setmem (rtx
*operands
)
30233 rtx dstbase
= operands
[0];
30234 unsigned HOST_WIDE_INT length
;
30235 unsigned HOST_WIDE_INT value
;
30236 unsigned HOST_WIDE_INT align
;
30238 if (!CONST_INT_P (operands
[2]) || !CONST_INT_P (operands
[1]))
30241 length
= UINTVAL (operands
[1]);
30245 value
= (UINTVAL (operands
[2]) & 0xFF);
30246 align
= UINTVAL (operands
[3]);
30247 if (TARGET_NEON
&& length
>= 8
30248 && current_tune
->string_ops_prefer_neon
30249 && arm_block_set_vect (dstbase
, length
, value
, align
))
30252 if (!unaligned_access
&& (align
& 3) != 0)
30253 return arm_block_set_unaligned_non_vect (dstbase
, length
, value
, align
);
30255 return arm_block_set_aligned_non_vect (dstbase
, length
, value
, align
);
30260 arm_macro_fusion_p (void)
30262 return current_tune
->fusible_ops
!= tune_params::FUSE_NOTHING
;
30265 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30266 for MOVW / MOVT macro fusion. */
30269 arm_sets_movw_movt_fusible_p (rtx prev_set
, rtx curr_set
)
30271 /* We are trying to fuse
30272 movw imm / movt imm
30273 instructions as a group that gets scheduled together. */
30275 rtx set_dest
= SET_DEST (curr_set
);
30277 if (GET_MODE (set_dest
) != SImode
)
30280 /* We are trying to match:
30281 prev (movw) == (set (reg r0) (const_int imm16))
30282 curr (movt) == (set (zero_extract (reg r0)
30285 (const_int imm16_1))
30287 prev (movw) == (set (reg r1)
30288 (high (symbol_ref ("SYM"))))
30289 curr (movt) == (set (reg r0)
30291 (symbol_ref ("SYM")))) */
30293 if (GET_CODE (set_dest
) == ZERO_EXTRACT
)
30295 if (CONST_INT_P (SET_SRC (curr_set
))
30296 && CONST_INT_P (SET_SRC (prev_set
))
30297 && REG_P (XEXP (set_dest
, 0))
30298 && REG_P (SET_DEST (prev_set
))
30299 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
30303 else if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
30304 && REG_P (SET_DEST (curr_set
))
30305 && REG_P (SET_DEST (prev_set
))
30306 && GET_CODE (SET_SRC (prev_set
)) == HIGH
30307 && REGNO (SET_DEST (curr_set
)) == REGNO (SET_DEST (prev_set
)))
30314 aarch_macro_fusion_pair_p (rtx_insn
* prev
, rtx_insn
* curr
)
30316 rtx prev_set
= single_set (prev
);
30317 rtx curr_set
= single_set (curr
);
30323 if (any_condjump_p (curr
))
30326 if (!arm_macro_fusion_p ())
30329 if (current_tune
->fusible_ops
& tune_params::FUSE_AES_AESMC
30330 && aarch_crypto_can_dual_issue (prev
, curr
))
30333 if (current_tune
->fusible_ops
& tune_params::FUSE_MOVW_MOVT
30334 && arm_sets_movw_movt_fusible_p (prev_set
, curr_set
))
30340 /* Return true iff the instruction fusion described by OP is enabled. */
30342 arm_fusion_enabled_p (tune_params::fuse_ops op
)
30344 return current_tune
->fusible_ops
& op
;
30347 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30348 scheduled for speculative execution. Reject the long-running division
30349 and square-root instructions. */
30352 arm_sched_can_speculate_insn (rtx_insn
*insn
)
30354 switch (get_attr_type (insn
))
30362 case TYPE_NEON_FP_SQRT_S
:
30363 case TYPE_NEON_FP_SQRT_D
:
30364 case TYPE_NEON_FP_SQRT_S_Q
:
30365 case TYPE_NEON_FP_SQRT_D_Q
:
30366 case TYPE_NEON_FP_DIV_S
:
30367 case TYPE_NEON_FP_DIV_D
:
30368 case TYPE_NEON_FP_DIV_S_Q
:
30369 case TYPE_NEON_FP_DIV_D_Q
:
30376 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30378 static unsigned HOST_WIDE_INT
30379 arm_asan_shadow_offset (void)
30381 return HOST_WIDE_INT_1U
<< 29;
30385 /* This is a temporary fix for PR60655. Ideally we need
30386 to handle most of these cases in the generic part but
30387 currently we reject minus (..) (sym_ref). We try to
30388 ameliorate the case with minus (sym_ref1) (sym_ref2)
30389 where they are in the same section. */
30392 arm_const_not_ok_for_debug_p (rtx p
)
30394 tree decl_op0
= NULL
;
30395 tree decl_op1
= NULL
;
30397 if (GET_CODE (p
) == UNSPEC
)
30399 if (GET_CODE (p
) == MINUS
)
30401 if (GET_CODE (XEXP (p
, 1)) == SYMBOL_REF
)
30403 decl_op1
= SYMBOL_REF_DECL (XEXP (p
, 1));
30405 && GET_CODE (XEXP (p
, 0)) == SYMBOL_REF
30406 && (decl_op0
= SYMBOL_REF_DECL (XEXP (p
, 0))))
30408 if ((VAR_P (decl_op1
)
30409 || TREE_CODE (decl_op1
) == CONST_DECL
)
30410 && (VAR_P (decl_op0
)
30411 || TREE_CODE (decl_op0
) == CONST_DECL
))
30412 return (get_variable_section (decl_op1
, false)
30413 != get_variable_section (decl_op0
, false));
30415 if (TREE_CODE (decl_op1
) == LABEL_DECL
30416 && TREE_CODE (decl_op0
) == LABEL_DECL
)
30417 return (DECL_CONTEXT (decl_op1
)
30418 != DECL_CONTEXT (decl_op0
));
30428 /* return TRUE if x is a reference to a value in a constant pool */
30430 arm_is_constant_pool_ref (rtx x
)
30433 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
30434 && CONSTANT_POOL_ADDRESS_P (XEXP (x
, 0)));
30437 /* Remember the last target of arm_set_current_function. */
30438 static GTY(()) tree arm_previous_fndecl
;
30440 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30443 save_restore_target_globals (tree new_tree
)
30445 /* If we have a previous state, use it. */
30446 if (TREE_TARGET_GLOBALS (new_tree
))
30447 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
30448 else if (new_tree
== target_option_default_node
)
30449 restore_target_globals (&default_target_globals
);
30452 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30453 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
30456 arm_option_params_internal ();
30459 /* Invalidate arm_previous_fndecl. */
30462 arm_reset_previous_fndecl (void)
30464 arm_previous_fndecl
= NULL_TREE
;
30467 /* Establish appropriate back-end context for processing the function
30468 FNDECL. The argument might be NULL to indicate processing at top
30469 level, outside of any function scope. */
30472 arm_set_current_function (tree fndecl
)
30474 if (!fndecl
|| fndecl
== arm_previous_fndecl
)
30477 tree old_tree
= (arm_previous_fndecl
30478 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl
)
30481 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30483 /* If current function has no attributes but previous one did,
30484 use the default node. */
30485 if (! new_tree
&& old_tree
)
30486 new_tree
= target_option_default_node
;
30488 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30489 the default have been handled by save_restore_target_globals from
30490 arm_pragma_target_parse. */
30491 if (old_tree
== new_tree
)
30494 arm_previous_fndecl
= fndecl
;
30496 /* First set the target options. */
30497 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
30499 save_restore_target_globals (new_tree
);
30502 /* Implement TARGET_OPTION_PRINT. */
30505 arm_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
30507 int flags
= ptr
->x_target_flags
;
30508 const char *fpu_name
;
30510 fpu_name
= (ptr
->x_arm_fpu_index
== TARGET_FPU_auto
30511 ? "auto" : all_fpus
[ptr
->x_arm_fpu_index
].name
);
30513 fprintf (file
, "%*sselected isa %s\n", indent
, "",
30514 TARGET_THUMB2_P (flags
) ? "thumb2" :
30515 TARGET_THUMB_P (flags
) ? "thumb1" :
30518 if (ptr
->x_arm_arch_string
)
30519 fprintf (file
, "%*sselected architecture %s\n", indent
, "",
30520 ptr
->x_arm_arch_string
);
30522 if (ptr
->x_arm_cpu_string
)
30523 fprintf (file
, "%*sselected CPU %s\n", indent
, "",
30524 ptr
->x_arm_cpu_string
);
30526 if (ptr
->x_arm_tune_string
)
30527 fprintf (file
, "%*sselected tune %s\n", indent
, "",
30528 ptr
->x_arm_tune_string
);
30530 fprintf (file
, "%*sselected fpu %s\n", indent
, "", fpu_name
);
30533 /* Hook to determine if one function can safely inline another. */
30536 arm_can_inline_p (tree caller
, tree callee
)
30538 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
30539 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
30540 bool can_inline
= true;
30542 struct cl_target_option
*caller_opts
30543 = TREE_TARGET_OPTION (caller_tree
? caller_tree
30544 : target_option_default_node
);
30546 struct cl_target_option
*callee_opts
30547 = TREE_TARGET_OPTION (callee_tree
? callee_tree
30548 : target_option_default_node
);
30550 if (callee_opts
== caller_opts
)
30553 /* Callee's ISA features should be a subset of the caller's. */
30554 struct arm_build_target caller_target
;
30555 struct arm_build_target callee_target
;
30556 caller_target
.isa
= sbitmap_alloc (isa_num_bits
);
30557 callee_target
.isa
= sbitmap_alloc (isa_num_bits
);
30559 arm_configure_build_target (&caller_target
, caller_opts
, &global_options_set
,
30561 arm_configure_build_target (&callee_target
, callee_opts
, &global_options_set
,
30563 if (!bitmap_subset_p (callee_target
.isa
, caller_target
.isa
))
30564 can_inline
= false;
30566 sbitmap_free (caller_target
.isa
);
30567 sbitmap_free (callee_target
.isa
);
30569 /* OK to inline between different modes.
30570 Function with mode specific instructions, e.g using asm,
30571 must be explicitly protected with noinline. */
30575 /* Hook to fix function's alignment affected by target attribute. */
30578 arm_relayout_function (tree fndecl
)
30580 if (DECL_USER_ALIGN (fndecl
))
30583 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
30586 callee_tree
= target_option_default_node
;
30588 struct cl_target_option
*opts
= TREE_TARGET_OPTION (callee_tree
);
30591 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts
->x_target_flags
)));
30594 /* Inner function to process the attribute((target(...))), take an argument and
30595 set the current options from the argument. If we have a list, recursively
30596 go over the list. */
30599 arm_valid_target_attribute_rec (tree args
, struct gcc_options
*opts
)
30601 if (TREE_CODE (args
) == TREE_LIST
)
30605 for (; args
; args
= TREE_CHAIN (args
))
30606 if (TREE_VALUE (args
)
30607 && !arm_valid_target_attribute_rec (TREE_VALUE (args
), opts
))
30612 else if (TREE_CODE (args
) != STRING_CST
)
30614 error ("attribute %<target%> argument not a string");
30618 char *argstr
= ASTRDUP (TREE_STRING_POINTER (args
));
30621 while ((q
= strtok (argstr
, ",")) != NULL
)
30623 while (ISSPACE (*q
)) ++q
;
30626 if (!strncmp (q
, "thumb", 5))
30627 opts
->x_target_flags
|= MASK_THUMB
;
30629 else if (!strncmp (q
, "arm", 3))
30630 opts
->x_target_flags
&= ~MASK_THUMB
;
30632 else if (!strncmp (q
, "fpu=", 4))
30635 if (! opt_enum_arg_to_value (OPT_mfpu_
, q
+4,
30636 &fpu_index
, CL_TARGET
))
30638 error ("invalid fpu for attribute(target(\"%s\"))", q
);
30641 if (fpu_index
== TARGET_FPU_auto
)
30643 /* This doesn't really make sense until we support
30644 general dynamic selection of the architecture and all
30646 sorry ("auto fpu selection not currently permitted here");
30649 opts
->x_arm_fpu_index
= (enum fpu_type
) fpu_index
;
30653 error ("attribute(target(\"%s\")) is unknown", q
);
30661 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30664 arm_valid_target_attribute_tree (tree args
, struct gcc_options
*opts
,
30665 struct gcc_options
*opts_set
)
30667 struct cl_target_option cl_opts
;
30669 if (!arm_valid_target_attribute_rec (args
, opts
))
30672 cl_target_option_save (&cl_opts
, opts
);
30673 arm_configure_build_target (&arm_active_target
, &cl_opts
, opts_set
, false);
30674 arm_option_check_internal (opts
);
30675 /* Do any overrides, such as global options arch=xxx. */
30676 arm_option_override_internal (opts
, opts_set
);
30678 return build_target_option_node (opts
);
30682 add_attribute (const char * mode
, tree
*attributes
)
30684 size_t len
= strlen (mode
);
30685 tree value
= build_string (len
, mode
);
30687 TREE_TYPE (value
) = build_array_type (char_type_node
,
30688 build_index_type (size_int (len
)));
30690 *attributes
= tree_cons (get_identifier ("target"),
30691 build_tree_list (NULL_TREE
, value
),
30695 /* For testing. Insert thumb or arm modes alternatively on functions. */
30698 arm_insert_attributes (tree fndecl
, tree
* attributes
)
30702 if (! TARGET_FLIP_THUMB
)
30705 if (TREE_CODE (fndecl
) != FUNCTION_DECL
|| DECL_EXTERNAL(fndecl
)
30706 || DECL_BUILT_IN (fndecl
) || DECL_ARTIFICIAL (fndecl
))
30709 /* Nested definitions must inherit mode. */
30710 if (current_function_decl
)
30712 mode
= TARGET_THUMB
? "thumb" : "arm";
30713 add_attribute (mode
, attributes
);
30717 /* If there is already a setting don't change it. */
30718 if (lookup_attribute ("target", *attributes
) != NULL
)
30721 mode
= thumb_flipper
? "thumb" : "arm";
30722 add_attribute (mode
, attributes
);
30724 thumb_flipper
= !thumb_flipper
;
30727 /* Hook to validate attribute((target("string"))). */
30730 arm_valid_target_attribute_p (tree fndecl
, tree
ARG_UNUSED (name
),
30731 tree args
, int ARG_UNUSED (flags
))
30734 struct gcc_options func_options
;
30735 tree cur_tree
, new_optimize
;
30736 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
30738 /* Get the optimization options of the current function. */
30739 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
30741 /* If the function changed the optimization levels as well as setting target
30742 options, start with the optimizations specified. */
30743 if (!func_optimize
)
30744 func_optimize
= optimization_default_node
;
30746 /* Init func_options. */
30747 memset (&func_options
, 0, sizeof (func_options
));
30748 init_options_struct (&func_options
, NULL
);
30749 lang_hooks
.init_options_struct (&func_options
);
30751 /* Initialize func_options to the defaults. */
30752 cl_optimization_restore (&func_options
,
30753 TREE_OPTIMIZATION (func_optimize
));
30755 cl_target_option_restore (&func_options
,
30756 TREE_TARGET_OPTION (target_option_default_node
));
30758 /* Set func_options flags with new target mode. */
30759 cur_tree
= arm_valid_target_attribute_tree (args
, &func_options
,
30760 &global_options_set
);
30762 if (cur_tree
== NULL_TREE
)
30765 new_optimize
= build_optimization_node (&func_options
);
30767 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = cur_tree
;
30769 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
30771 finalize_options_struct (&func_options
);
30776 /* Match an ISA feature bitmap to a named FPU. We always use the
30777 first entry that exactly matches the feature set, so that we
30778 effectively canonicalize the FPU name for the assembler. */
30780 arm_identify_fpu_from_isa (sbitmap isa
)
30782 auto_sbitmap
fpubits (isa_num_bits
);
30783 auto_sbitmap
cand_fpubits (isa_num_bits
);
30785 bitmap_and (fpubits
, isa
, isa_all_fpubits
);
30787 /* If there are no ISA feature bits relating to the FPU, we must be
30788 doing soft-float. */
30789 if (bitmap_empty_p (fpubits
))
30792 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
30794 arm_initialize_isa (cand_fpubits
, all_fpus
[i
].isa_bits
);
30795 if (bitmap_equal_p (fpubits
, cand_fpubits
))
30796 return all_fpus
[i
].name
;
30798 /* We must find an entry, or things have gone wrong. */
30799 gcc_unreachable ();
30803 arm_declare_function_name (FILE *stream
, const char *name
, tree decl
)
30806 fprintf (stream
, "\t.syntax unified\n");
30810 if (is_called_in_ARM_mode (decl
)
30811 || (TARGET_THUMB1
&& !TARGET_THUMB1_ONLY
30812 && cfun
->is_thunk
))
30813 fprintf (stream
, "\t.code 32\n");
30814 else if (TARGET_THUMB1
)
30815 fprintf (stream
, "\t.code\t16\n\t.thumb_func\n");
30817 fprintf (stream
, "\t.thumb\n\t.thumb_func\n");
30820 fprintf (stream
, "\t.arm\n");
30822 asm_fprintf (asm_out_file
, "\t.fpu %s\n",
30825 : arm_identify_fpu_from_isa (arm_active_target
.isa
)));
30827 if (TARGET_POKE_FUNCTION_NAME
)
30828 arm_poke_function_name (stream
, (const char *) name
);
30831 /* If MEM is in the form of [base+offset], extract the two parts
30832 of address and set to BASE and OFFSET, otherwise return false
30833 after clearing BASE and OFFSET. */
30836 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
30840 gcc_assert (MEM_P (mem
));
30842 addr
= XEXP (mem
, 0);
30844 /* Strip off const from addresses like (const (addr)). */
30845 if (GET_CODE (addr
) == CONST
)
30846 addr
= XEXP (addr
, 0);
30848 if (GET_CODE (addr
) == REG
)
30851 *offset
= const0_rtx
;
30855 if (GET_CODE (addr
) == PLUS
30856 && GET_CODE (XEXP (addr
, 0)) == REG
30857 && CONST_INT_P (XEXP (addr
, 1)))
30859 *base
= XEXP (addr
, 0);
30860 *offset
= XEXP (addr
, 1);
30865 *offset
= NULL_RTX
;
30870 /* If INSN is a load or store of address in the form of [base+offset],
30871 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30872 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30873 otherwise return FALSE. */
30876 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
, bool *is_load
)
30880 gcc_assert (INSN_P (insn
));
30881 x
= PATTERN (insn
);
30882 if (GET_CODE (x
) != SET
)
30886 dest
= SET_DEST (x
);
30887 if (GET_CODE (src
) == REG
&& GET_CODE (dest
) == MEM
)
30890 extract_base_offset_in_addr (dest
, base
, offset
);
30892 else if (GET_CODE (src
) == MEM
&& GET_CODE (dest
) == REG
)
30895 extract_base_offset_in_addr (src
, base
, offset
);
30900 return (*base
!= NULL_RTX
&& *offset
!= NULL_RTX
);
30903 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30905 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30906 and PRI are only calculated for these instructions. For other instruction,
30907 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30908 instruction fusion can be supported by returning different priorities.
30910 It's important that irrelevant instructions get the largest FUSION_PRI. */
30913 arm_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
30914 int *fusion_pri
, int *pri
)
30920 gcc_assert (INSN_P (insn
));
30923 if (!fusion_load_store (insn
, &base
, &offset
, &is_load
))
30930 /* Load goes first. */
30932 *fusion_pri
= tmp
- 1;
30934 *fusion_pri
= tmp
- 2;
30938 /* INSN with smaller base register goes first. */
30939 tmp
-= ((REGNO (base
) & 0xff) << 20);
30941 /* INSN with smaller offset goes first. */
30942 off_val
= (int)(INTVAL (offset
));
30944 tmp
-= (off_val
& 0xfffff);
30946 tmp
+= ((- off_val
) & 0xfffff);
30953 /* Construct and return a PARALLEL RTX vector with elements numbering the
30954 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30955 the vector - from the perspective of the architecture. This does not
30956 line up with GCC's perspective on lane numbers, so we end up with
30957 different masks depending on our target endian-ness. The diagram
30958 below may help. We must draw the distinction when building masks
30959 which select one half of the vector. An instruction selecting
30960 architectural low-lanes for a big-endian target, must be described using
30961 a mask selecting GCC high-lanes.
30963 Big-Endian Little-Endian
30965 GCC 0 1 2 3 3 2 1 0
30966 | x | x | x | x | | x | x | x | x |
30967 Architecture 3 2 1 0 3 2 1 0
30969 Low Mask: { 2, 3 } { 0, 1 }
30970 High Mask: { 0, 1 } { 2, 3 }
30974 arm_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
30976 int nunits
= GET_MODE_NUNITS (mode
);
30977 rtvec v
= rtvec_alloc (nunits
/ 2);
30978 int high_base
= nunits
/ 2;
30984 if (BYTES_BIG_ENDIAN
)
30985 base
= high
? low_base
: high_base
;
30987 base
= high
? high_base
: low_base
;
30989 for (i
= 0; i
< nunits
/ 2; i
++)
30990 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
30992 t1
= gen_rtx_PARALLEL (mode
, v
);
30996 /* Check OP for validity as a PARALLEL RTX vector with elements
30997 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30998 from the perspective of the architecture. See the diagram above
30999 arm_simd_vect_par_cnst_half_p for more details. */
31002 arm_simd_check_vect_par_cnst_half_p (rtx op
, machine_mode mode
,
31005 rtx ideal
= arm_simd_vect_par_cnst_half (mode
, high
);
31006 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
31007 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
31010 if (!VECTOR_MODE_P (mode
))
31013 if (count_op
!= count_ideal
)
31016 for (i
= 0; i
< count_ideal
; i
++)
31018 rtx elt_op
= XVECEXP (op
, 0, i
);
31019 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
31021 if (!CONST_INT_P (elt_op
)
31022 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
31028 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31031 arm_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
31034 /* For now, we punt and not handle this for TARGET_THUMB1. */
31035 if (vcall_offset
&& TARGET_THUMB1
)
31038 /* Otherwise ok. */
31042 /* Generate RTL for a conditional branch with rtx comparison CODE in
31043 mode CC_MODE. The destination of the unlikely conditional branch
31047 arm_gen_unlikely_cbranch (enum rtx_code code
, machine_mode cc_mode
,
31051 x
= gen_rtx_fmt_ee (code
, VOIDmode
,
31052 gen_rtx_REG (cc_mode
, CC_REGNUM
),
31055 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
31056 gen_rtx_LABEL_REF (VOIDmode
, label_ref
),
31058 emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
31061 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31063 For pure-code sections there is no letter code for this attribute, so
31064 output all the section flags numerically when this is needed. */
31067 arm_asm_elf_flags_numeric (unsigned int flags
, unsigned int *num
)
31070 if (flags
& SECTION_ARM_PURECODE
)
31074 if (!(flags
& SECTION_DEBUG
))
31076 if (flags
& SECTION_EXCLUDE
)
31077 *num
|= 0x80000000;
31078 if (flags
& SECTION_WRITE
)
31080 if (flags
& SECTION_CODE
)
31082 if (flags
& SECTION_MERGE
)
31084 if (flags
& SECTION_STRINGS
)
31086 if (flags
& SECTION_TLS
)
31088 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
31097 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31099 If pure-code is passed as an option, make sure all functions are in
31100 sections that have the SHF_ARM_PURECODE attribute. */
31103 arm_function_section (tree decl
, enum node_frequency freq
,
31104 bool startup
, bool exit
)
31106 const char * section_name
;
31109 if (!decl
|| TREE_CODE (decl
) != FUNCTION_DECL
)
31110 return default_function_section (decl
, freq
, startup
, exit
);
31112 if (!target_pure_code
)
31113 return default_function_section (decl
, freq
, startup
, exit
);
31116 section_name
= DECL_SECTION_NAME (decl
);
31118 /* If a function is not in a named section then it falls under the 'default'
31119 text section, also known as '.text'. We can preserve previous behavior as
31120 the default text section already has the SHF_ARM_PURECODE section
31124 section
*default_sec
= default_function_section (decl
, freq
, startup
,
31127 /* If default_sec is not null, then it must be a special section like for
31128 example .text.startup. We set the pure-code attribute and return the
31129 same section to preserve existing behavior. */
31131 default_sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31132 return default_sec
;
31135 /* Otherwise look whether a section has already been created with
31137 sec
= get_named_section (decl
, section_name
, 0);
31139 /* If that is not the case passing NULL as the section's name to
31140 'get_named_section' will create a section with the declaration's
31142 sec
= get_named_section (decl
, NULL
, 0);
31144 /* Set the SHF_ARM_PURECODE attribute. */
31145 sec
->common
.flags
|= SECTION_ARM_PURECODE
;
31150 /* Implements the TARGET_SECTION_FLAGS hook.
31152 If DECL is a function declaration and pure-code is passed as an option
31153 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31154 section's name and RELOC indicates whether the declarations initializer may
31155 contain runtime relocations. */
31157 static unsigned int
31158 arm_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
31160 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
31162 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
&& target_pure_code
)
31163 flags
|= SECTION_ARM_PURECODE
;
31168 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31171 arm_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
31173 rtx
*quot_p
, rtx
*rem_p
)
31175 if (mode
== SImode
)
31176 gcc_assert (!TARGET_IDIV
);
31178 scalar_int_mode libval_mode
31179 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode
));
31181 rtx libval
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
31183 op0
, GET_MODE (op0
),
31184 op1
, GET_MODE (op1
));
31186 rtx quotient
= simplify_gen_subreg (mode
, libval
, libval_mode
, 0);
31187 rtx remainder
= simplify_gen_subreg (mode
, libval
, libval_mode
,
31188 GET_MODE_SIZE (mode
));
31190 gcc_assert (quotient
);
31191 gcc_assert (remainder
);
31193 *quot_p
= quotient
;
31194 *rem_p
= remainder
;
31197 /* This function checks for the availability of the coprocessor builtin passed
31198 in BUILTIN for the current target. Returns true if it is available and
31199 false otherwise. If a BUILTIN is passed for which this function has not
31200 been implemented it will cause an exception. */
31203 arm_coproc_builtin_available (enum unspecv builtin
)
31205 /* None of these builtins are available in Thumb mode if the target only
31206 supports Thumb-1. */
31224 case VUNSPEC_LDC2L
:
31226 case VUNSPEC_STC2L
:
31229 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31236 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31238 if (arm_arch6
|| arm_arch5te
)
31241 case VUNSPEC_MCRR2
:
31242 case VUNSPEC_MRRC2
:
31247 gcc_unreachable ();
31252 /* This function returns true if OP is a valid memory operand for the ldc and
31253 stc coprocessor instructions and false otherwise. */
31256 arm_coproc_ldc_stc_legitimate_address (rtx op
)
31258 HOST_WIDE_INT range
;
31259 /* Has to be a memory operand. */
31265 /* We accept registers. */
31269 switch GET_CODE (op
)
31273 /* Or registers with an offset. */
31274 if (!REG_P (XEXP (op
, 0)))
31279 /* The offset must be an immediate though. */
31280 if (!CONST_INT_P (op
))
31283 range
= INTVAL (op
);
31285 /* Within the range of [-1020,1020]. */
31286 if (!IN_RANGE (range
, -1020, 1020))
31289 /* And a multiple of 4. */
31290 return (range
% 4) == 0;
31296 return REG_P (XEXP (op
, 0));
31298 gcc_unreachable ();
31303 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31305 In VFPv1, VFP registers could only be accessed in the mode they were
31306 set, so subregs would be invalid there. However, we don't support
31307 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31309 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31310 VFP registers in little-endian order. We can't describe that accurately to
31311 GCC, so avoid taking subregs of such values.
31313 The only exception is going from a 128-bit to a 64-bit type. In that
31314 case the data layout happens to be consistent for big-endian, so we
31315 explicitly allow that case. */
31318 arm_can_change_mode_class (machine_mode from
, machine_mode to
,
31319 reg_class_t rclass
)
31322 && !(GET_MODE_SIZE (from
) == 16 && GET_MODE_SIZE (to
) == 8)
31323 && (GET_MODE_SIZE (from
) > UNITS_PER_WORD
31324 || GET_MODE_SIZE (to
) > UNITS_PER_WORD
)
31325 && reg_classes_intersect_p (VFP_REGS
, rclass
))
31330 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31331 strcpy from constants will be faster. */
31333 static HOST_WIDE_INT
31334 arm_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
31336 unsigned int factor
= (TARGET_THUMB
|| ! arm_tune_xscale
? 1 : 2);
31337 if (TREE_CODE (exp
) == STRING_CST
&& !optimize_size
)
31338 return MAX (align
, BITS_PER_WORD
* factor
);
31343 namespace selftest
{
31345 /* Scan the static data tables generated by parsecpu.awk looking for
31346 potential issues with the data. We primarily check for
31347 inconsistencies in the option extensions at present (extensions
31348 that duplicate others but aren't marked as aliases). Furthermore,
31349 for correct canonicalization later options must never be a subset
31350 of an earlier option. Any extension should also only specify other
31351 feature bits and never an architecture bit. The architecture is inferred
31352 from the declaration of the extension. */
31354 arm_test_cpu_arch_data (void)
31356 const arch_option
*arch
;
31357 const cpu_option
*cpu
;
31358 auto_sbitmap
target_isa (isa_num_bits
);
31359 auto_sbitmap
isa1 (isa_num_bits
);
31360 auto_sbitmap
isa2 (isa_num_bits
);
31362 for (arch
= all_architectures
; arch
->common
.name
!= NULL
; ++arch
)
31364 const cpu_arch_extension
*ext1
, *ext2
;
31366 if (arch
->common
.extensions
== NULL
)
31369 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31371 for (ext1
= arch
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31376 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31377 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31379 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31382 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31383 /* If the option is a subset of the parent option, it doesn't
31384 add anything and so isn't useful. */
31385 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31387 /* If the extension specifies any architectural bits then
31388 disallow it. Extensions should only specify feature bits. */
31389 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31394 for (cpu
= all_cores
; cpu
->common
.name
!= NULL
; ++cpu
)
31396 const cpu_arch_extension
*ext1
, *ext2
;
31398 if (cpu
->common
.extensions
== NULL
)
31401 arm_initialize_isa (target_isa
, arch
->common
.isa_bits
);
31403 for (ext1
= cpu
->common
.extensions
; ext1
->name
!= NULL
; ++ext1
)
31408 arm_initialize_isa (isa1
, ext1
->isa_bits
);
31409 for (ext2
= ext1
+ 1; ext2
->name
!= NULL
; ++ext2
)
31411 if (ext2
->alias
|| ext1
->remove
!= ext2
->remove
)
31414 arm_initialize_isa (isa2
, ext2
->isa_bits
);
31415 /* If the option is a subset of the parent option, it doesn't
31416 add anything and so isn't useful. */
31417 ASSERT_TRUE (!bitmap_subset_p (isa2
, isa1
));
31419 /* If the extension specifies any architectural bits then
31420 disallow it. Extensions should only specify feature bits. */
31421 ASSERT_TRUE (!bitmap_intersect_p (isa2
, target_isa
));
31427 /* Scan the static data tables generated by parsecpu.awk looking for
31428 potential issues with the data. Here we check for consistency between the
31429 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31430 a feature bit that is not defined by any FPU flag. */
31432 arm_test_fpu_data (void)
31434 auto_sbitmap
isa_all_fpubits (isa_num_bits
);
31435 auto_sbitmap
fpubits (isa_num_bits
);
31436 auto_sbitmap
tmpset (isa_num_bits
);
31438 static const enum isa_feature fpu_bitlist
[]
31439 = { ISA_ALL_FPU_INTERNAL
, isa_nobit
};
31440 arm_initialize_isa (isa_all_fpubits
, fpu_bitlist
);
31442 for (unsigned int i
= 0; i
< TARGET_FPU_auto
; i
++)
31444 arm_initialize_isa (fpubits
, all_fpus
[i
].isa_bits
);
31445 bitmap_and_compl (tmpset
, isa_all_fpubits
, fpubits
);
31446 bitmap_clear (isa_all_fpubits
);
31447 bitmap_copy (isa_all_fpubits
, tmpset
);
31450 if (!bitmap_empty_p (isa_all_fpubits
))
31452 fprintf (stderr
, "Error: found feature bits in the ALL_FPU_INTERAL"
31453 " group that are not defined by any FPU.\n"
31454 " Check your arm-cpus.in.\n");
31455 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits
));
31460 arm_run_selftests (void)
31462 arm_test_cpu_arch_data ();
31463 arm_test_fpu_data ();
31465 } /* Namespace selftest. */
31467 #undef TARGET_RUN_TARGET_SELFTESTS
31468 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31469 #endif /* CHECKING_P */
31471 struct gcc_target targetm
= TARGET_INITIALIZER
;
31473 #include "gt-arm.h"